summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/i2c/ce4100-i2c.txt93
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-cmos.txt28
-rw-r--r--Documentation/devicetree/bindings/x86/ce4100.txt38
-rw-r--r--Documentation/devicetree/bindings/x86/interrupt.txt26
-rw-r--r--Documentation/devicetree/bindings/x86/timer.txt6
-rw-r--r--Documentation/devicetree/booting-without-of.txt20
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/rtc.txt29
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/include/asm/futex.h29
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/alpha/kernel/sys_titan.c1
-rw-r--r--arch/alpha/kernel/time.c8
-rw-r--r--arch/arm/include/asm/futex.h29
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/mach-clps711x/include/mach/time.h2
-rw-r--r--arch/blackfin/kernel/time.c6
-rw-r--r--arch/cris/arch-v10/kernel/time.c4
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/cris/arch-v32/kernel/time.c6
-rw-r--r--arch/frv/include/asm/futex.h5
-rw-r--r--arch/frv/kernel/futex.c14
-rw-r--r--arch/frv/kernel/time.c14
-rw-r--r--arch/h8300/kernel/time.c4
-rw-r--r--arch/h8300/kernel/timer/timer8.c2
-rw-r--r--arch/ia64/include/asm/futex.h15
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/include/asm/xen/hypercall.h2
-rw-r--r--arch/ia64/kernel/time.c19
-rw-r--r--arch/ia64/xen/suspend.c9
-rw-r--r--arch/ia64/xen/time.c13
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m68k/bvme6000/config.c4
-rw-r--r--arch/m68k/kernel/time.c4
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/sun3ints.c2
-rw-r--r--arch/m68knommu/kernel/time.c8
-rw-r--r--arch/microblaze/include/asm/futex.h31
-rw-r--r--arch/microblaze/include/asm/pci-bridge.h12
-rw-r--r--arch/microblaze/include/asm/prom.h15
-rw-r--r--arch/microblaze/kernel/prom_parse.c77
-rw-r--r--arch/microblaze/pci/pci-common.c1
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/alchemy/mtx-1/board_setup.c4
-rw-r--r--arch/mips/alchemy/mtx-1/platform.c9
-rw-r--r--arch/mips/alchemy/xxs1500/board_setup.c4
-rw-r--r--arch/mips/include/asm/futex.h39
-rw-r--r--arch/mips/include/asm/perf_event.h12
-rw-r--r--arch/mips/kernel/ftrace.c179
-rw-r--r--arch/mips/kernel/perf_event.c345
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mips/kernel/smp.c31
-rw-r--r--arch/mips/kernel/syscall.c5
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/loongson/Kconfig5
-rw-r--r--arch/mips/loongson/common/cmdline.c5
-rw-r--r--arch/mips/loongson/common/machtype.c3
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/ops-pmcmsp.c4
-rw-r--r--arch/mips/pmc-sierra/Kconfig4
-rw-r--r--arch/mips/pmc-sierra/msp71xx/msp_time.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h2
-rw-r--r--arch/mn10300/include/asm/uaccess.h5
-rw-r--r--arch/mn10300/kernel/time.c6
-rw-r--r--arch/mn10300/mm/cache-inv-icache.c4
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/parisc/include/asm/futex.h24
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/powerpc/include/asm/futex.h27
-rw-r--r--arch/powerpc/include/asm/lppaca.h16
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h10
-rw-r--r--arch/powerpc/include/asm/prom.h15
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/prom_parse.c84
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/powerpc/platforms/iseries/dt.c6
-rw-r--r--arch/powerpc/platforms/iseries/setup.c1
-rw-r--r--arch/s390/include/asm/futex.h12
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/lib/uaccess.h8
-rw-r--r--arch/s390/lib/uaccess_pt.c17
-rw-r--r--arch/s390/lib/uaccess_std.c8
-rw-r--r--arch/sh/include/asm/futex-irq.h24
-rw-r--r--arch/sh/include/asm/futex.h11
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sparc/include/asm/futex_64.h20
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/time_32.c9
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/tile/include/asm/futex.h27
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/Kconfig.x865
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/kernel/irq.c31
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/ia32/ia32entry.S30
-rw-r--r--arch/x86/include/asm/acpi.h10
-rw-r--r--arch/x86/include/asm/amd_nb.h14
-rw-r--r--arch/x86/include/asm/apic.h42
-rw-r--r--arch/x86/include/asm/apicdef.h12
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/frame.h6
-rw-r--r--arch/x86/include/asm/futex.h22
-rw-r--r--arch/x86/include/asm/hw_irq.h24
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/io_apic.h44
-rw-r--r--arch/x86/include/asm/ipi.h8
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/irq_vectors.h45
-rw-r--r--arch/x86/include/asm/mpspec.h3
-rw-r--r--arch/x86/include/asm/numa.h52
-rw-r--r--arch/x86/include/asm/numa_32.h7
-rw-r--r--arch/x86/include/asm/numa_64.h23
-rw-r--r--arch/x86/include/asm/olpc_ofw.h14
-rw-r--r--arch/x86/include/asm/page_types.h9
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/prom.h70
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h10
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/topology.h19
-rw-r--r--arch/x86/include/asm/unistd_32.h5
-rw-r--r--arch/x86/include/asm/unistd_64.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/amd_nb.c84
-rw-r--r--arch/x86/kernel/apb_timer.c60
-rw-r--r--arch/x86/kernel/aperture_64.c33
-rw-r--r--arch/x86/kernel/apic/apic.c150
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c26
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c34
-rw-r--r--arch/x86/kernel/apic/es7000_32.c35
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c388
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c21
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/summit_32.c47
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c65
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c61
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/devicetree.c441
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kernel/entry_32.S5
-rw-r--r--arch/x86/kernel/entry_64.S7
-rw-r--r--arch/x86/kernel/head_32.S10
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/ioport.c20
-rw-r--r--arch/x86/kernel/irq.c91
-rw-r--r--arch/x86/kernel/irqinit.c92
-rw-r--r--arch/x86/kernel/microcode_amd.c188
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c76
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/smpboot.c124
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/checksum_32.S63
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/x86/lib/rwsem_64.S56
-rw-r--r--arch/x86/lib/semaphore_32.S38
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S27
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology_64.c142
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init.c56
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/init_64.c78
-rw-r--r--arch/x86/mm/numa.c212
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c988
-rw-r--r--arch/x86/mm/numa_emulation.c494
-rw-r--r--arch/x86/mm/numa_internal.h31
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/mm/srat_32.c6
-rw-r--r--arch/x86/mm/srat_64.c367
-rw-r--r--arch/x86/mm/tlb.c14
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/ce4100.c9
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c26
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts428
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/mrst/vrtc.c16
-rw-r--r--arch/x86/platform/olpc/Makefile4
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/platform/uv/uv_irq.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c4
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c84
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--arch/xtensa/kernel/time.c6
-rw-r--r--block/blk-lib.c19
-rw-r--r--drivers/acpi/numa.c9
-rw-r--r--drivers/block/xen-blkfront.c87
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c8
-rw-r--r--drivers/char/mmtimer.c30
-rw-r--r--drivers/gpio/ml_ioh_gpio.c1
-rw-r--r--drivers/gpio/pch_gpio.c1
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c36
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/r100.c22
-rw-r--r--drivers/gpu/drm/radeon/r600.c3
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c14
-rw-r--r--drivers/gpu/drm/radeon/rs600.c1
-rw-r--r--drivers/gpu/drm/radeon/rs690.c1
-rw-r--r--drivers/gpu/drm/radeon/rv770.c3
-rw-r--r--drivers/hwmon/f71882fg.c4
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c1
-rw-r--r--drivers/i2c/busses/i2c-ocores.c16
-rw-r--r--drivers/i2c/busses/i2c-omap.c4
-rw-r--r--drivers/i2c/i2c-core.c2
-rw-r--r--drivers/media/common/tuners/tda8290.c14
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c21
-rw-r--r--drivers/media/dvb/dvb-usb/lmedm04.c6
-rw-r--r--drivers/media/dvb/frontends/dib7000m.c19
-rw-r--r--drivers/media/dvb/frontends/dib7000m.h15
-rw-r--r--drivers/media/dvb/mantis/mantis_pci.c1
-rw-r--r--drivers/media/rc/ir-raw.c3
-rw-r--r--drivers/media/rc/mceusb.c27
-rw-r--r--drivers/media/rc/nuvoton-cir.c5
-rw-r--r--drivers/media/rc/nuvoton-cir.h7
-rw-r--r--drivers/media/rc/rc-main.c2
-rw-r--r--drivers/media/video/au0828/au0828-video.c28
-rw-r--r--drivers/media/video/cx18/cx18-cards.c50
-rw-r--r--drivers/media/video/cx18/cx18-driver.c25
-rw-r--r--drivers/media/video/cx18/cx18-driver.h3
-rw-r--r--drivers/media/video/cx18/cx18-dvb.c38
-rw-r--r--drivers/media/video/cx23885/cx23885-i2c.c10
-rw-r--r--drivers/media/video/cx25840/cx25840-core.c3
-rw-r--r--drivers/media/video/ivtv/ivtv-irq.c58
-rw-r--r--drivers/media/video/mem2mem_testdev.c1
-rw-r--r--drivers/media/video/s2255drv.c10
-rw-r--r--drivers/mmc/core/core.c2
-rw-r--r--drivers/mmc/host/mmc_spi.c4
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c43
-rw-r--r--drivers/mtd/chips/jedec_probe.c35
-rw-r--r--drivers/mtd/maps/amd76xrom.c1
-rw-r--r--drivers/mtd/mtd_blkdevs.c1
-rw-r--r--drivers/mtd/nand/omap2.c2
-rw-r--r--drivers/mtd/onenand/generic.c2
-rw-r--r--drivers/mtd/onenand/omap2.c2
-rw-r--r--drivers/net/ariadne.c5
-rw-r--r--drivers/net/bnx2x/bnx2x.h5
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.c22
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.h9
-rw-r--r--drivers/net/bnx2x/bnx2x_ethtool.c18
-rw-r--r--drivers/net/bnx2x/bnx2x_main.c19
-rw-r--r--drivers/net/bonding/bond_3ad.c32
-rw-r--r--drivers/net/bonding/bond_3ad.h3
-rw-r--r--drivers/net/ethoc.c8
-rw-r--r--drivers/net/macvtap.c3
-rw-r--r--drivers/net/r6040.c115
-rw-r--r--drivers/net/smsc911x.c5
-rw-r--r--drivers/of/Kconfig6
-rw-r--r--drivers/of/Makefile1
-rw-r--r--drivers/of/of_pci.c92
-rw-r--r--drivers/pci/xen-pcifront.c31
-rw-r--r--drivers/rtc/class.c7
-rw-r--r--drivers/rtc/interface.c180
-rw-r--r--drivers/rtc/rtc-at91rm9200.c28
-rw-r--r--drivers/rtc/rtc-at91sam9.c28
-rw-r--r--drivers/rtc/rtc-bfin.c27
-rw-r--r--drivers/rtc/rtc-cmos.c111
-rw-r--r--drivers/rtc/rtc-davinci.c55
-rw-r--r--drivers/rtc/rtc-ds1511.c17
-rw-r--r--drivers/rtc/rtc-ds1553.c17
-rw-r--r--drivers/rtc/rtc-ds3232.c18
-rw-r--r--drivers/rtc/rtc-jz4740.c7
-rw-r--r--drivers/rtc/rtc-mc13xxx.c7
-rw-r--r--drivers/rtc/rtc-mpc5121.c20
-rw-r--r--drivers/rtc/rtc-mrst.c33
-rw-r--r--drivers/rtc/rtc-mxc.c7
-rw-r--r--drivers/rtc/rtc-nuc900.c15
-rw-r--r--drivers/rtc/rtc-omap.c39
-rw-r--r--drivers/rtc/rtc-pcap.c6
-rw-r--r--drivers/rtc/rtc-pcf50633.c22
-rw-r--r--drivers/rtc/rtc-pl030.c6
-rw-r--r--drivers/rtc/rtc-pl031.c55
-rw-r--r--drivers/rtc/rtc-proc.c8
-rw-r--r--drivers/rtc/rtc-pxa.c44
-rw-r--r--drivers/rtc/rtc-rs5c372.c52
-rw-r--r--drivers/rtc/rtc-rx8025.c25
-rw-r--r--drivers/rtc/rtc-s3c.c33
-rw-r--r--drivers/rtc/rtc-sa1100.c160
-rw-r--r--drivers/rtc/rtc-sh.c24
-rw-r--r--drivers/rtc/rtc-stmp3xxx.c15
-rw-r--r--drivers/rtc/rtc-test.c13
-rw-r--r--drivers/rtc/rtc-twl.c13
-rw-r--r--drivers/rtc/rtc-vr41xx.c32
-rw-r--r--drivers/rtc/rtc-wm831x.c16
-rw-r--r--drivers/rtc/rtc-wm8350.c21
-rw-r--r--drivers/spi/pxa2xx_spi.c2
-rw-r--r--drivers/spi/pxa2xx_spi_pci.c2
-rw-r--r--drivers/spi/xilinx_spi.c6
-rw-r--r--drivers/target/target_core_tmr.c5
-rw-r--r--drivers/target/target_core_transport.c8
-rw-r--r--drivers/watchdog/cpwd.c2
-rw-r--r--drivers/watchdog/hpwdt.c4
-rw-r--r--drivers/watchdog/sbc_fitpc2_wdt.c7
-rw-r--r--drivers/watchdog/sch311x_wdt.c2
-rw-r--r--drivers/watchdog/w83697ug_wdt.c2
-rw-r--r--drivers/xen/balloon.c16
-rw-r--r--drivers/xen/events.c342
-rw-r--r--drivers/xen/manage.c153
-rw-r--r--drivers/xen/platform-pci.c3
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/internal.h13
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/namei.c1490
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c91
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/open.c134
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--include/asm-generic/cputime.h3
-rw-r--r--include/asm-generic/fcntl.h4
-rw-r--r--include/asm-generic/futex.h7
-rw-r--r--include/asm-generic/unistd.h6
-rw-r--r--include/linux/debugobjects.h5
-rw-r--r--include/linux/device.h7
-rw-r--r--include/linux/exportfs.h9
-rw-r--r--include/linux/fcntl.h1
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/fs.h19
-rw-r--r--include/linux/hrtimer.h24
-rw-r--r--include/linux/i2c.h2
-rw-r--r--include/linux/interrupt.h85
-rw-r--r--include/linux/irq.h368
-rw-r--r--include/linux/irqdesc.h78
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/namei.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/nfs_fs_sb.h10
-rw-r--r--include/linux/of.h16
-rw-r--r--include/linux/of_pci.h9
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/plist.h47
-rw-r--r--include/linux/posix-clock.h150
-rw-r--r--include/linux/posix-timers.h44
-rw-r--r--include/linux/rtc.h5
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/sched.h15
-rw-r--r--include/linux/security.h9
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--include/linux/sunrpc/sched.h1
-rw-r--r--include/linux/syscalls.h10
-rw-r--r--include/linux/sysctl.h14
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--include/linux/time.h14
-rw-r--r--include/linux/timex.h3
-rw-r--r--include/target/target_core_transport.h2
-rw-r--r--include/xen/events.h8
-rw-r--r--include/xen/interface/io/blkif.h37
-rw-r--r--include/xen/interface/xen.h4
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/audit_watch.c85
-rw-r--r--kernel/compat.c136
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/futex.c147
-rw-r--r--kernel/hrtimer.c90
-rw-r--r--kernel/irq/Kconfig39
-rw-r--r--kernel/irq/autoprobe.c54
-rw-r--r--kernel/irq/chip.c483
-rw-r--r--kernel/irq/compat.h72
-rw-r--r--kernel/irq/debug.h40
-rw-r--r--kernel/irq/handle.c144
-rw-r--r--kernel/irq/internals.h167
-rw-r--r--kernel/irq/irqdesc.c68
-rw-r--r--kernel/irq/manage.c602
-rw-r--r--kernel/irq/migration.c38
-rw-r--r--kernel/irq/pm.c30
-rw-r--r--kernel/irq/proc.c70
-rw-r--r--kernel/irq/resend.c17
-rw-r--r--kernel/irq/settings.h138
-rw-r--r--kernel/irq/spurious.c163
-rw-r--r--kernel/posix-cpu-timers.c110
-rw-r--r--kernel/posix-timers.c342
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c40
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched.c302
-rw-r--r--kernel/sched_autogroup.c15
-rw-r--r--kernel/sched_autogroup.h5
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c397
-rw-r--r--kernel/sched_idletask.c26
-rw-r--r--kernel/sched_rt.c33
-rw-r--r--kernel/sched_stoptask.c7
-rw-r--r--kernel/softirq.c24
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c24
-rw-r--r--kernel/sysctl_binary.c19
-rw-r--r--kernel/time.c35
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/jiffies.c20
-rw-r--r--kernel/time/ntp.c13
-rw-r--r--kernel/time/posix-clock.c451
-rw-r--r--kernel/time/tick-broadcast.c1
-rw-r--r--kernel/time/tick-common.c1
-rw-r--r--kernel/time/tick-internal.h9
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/time/timekeeping.c141
-rw-r--r--kernel/timer.c42
-rw-r--r--kernel/workqueue.c6
-rw-r--r--lib/debugobjects.c9
-rw-r--r--lib/plist.c135
-rw-r--r--lib/rwsem.c10
-rw-r--r--mm/Makefile8
-rw-r--r--mm/bootmem.c180
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/nobootmem.c435
-rw-r--r--mm/page_alloc.c71
-rw-r--r--mm/rmap.c54
-rw-r--r--mm/shmem.c4
-rw-r--r--net/Makefile4
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/scm.c2
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/sunrpc/sched.c75
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c2
-rw-r--r--scripts/basic/fixdep.c19
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--scripts/mod/sumversion.c19
-rw-r--r--scripts/rt-tester/rt-tester.py2
-rw-r--r--scripts/rt-tester/t2-l1-2rt-sameprio.tst5
-rw-r--r--scripts/rt-tester/t2-l1-pi.tst5
-rw-r--r--scripts/rt-tester/t2-l1-signal.tst5
-rw-r--r--scripts/rt-tester/t2-l2-2rt-deadlock.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-1rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-2rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-3rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-signal.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-steal.tst5
-rw-r--r--scripts/rt-tester/t3-l2-pi.tst5
-rw-r--r--scripts/rt-tester/t4-l2-pi-deboost.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost.tst5
-rw-r--r--security/commoncap.c2
-rw-r--r--security/security.c2
-rw-r--r--sound/soc/codecs/wm8978.c14
-rw-r--r--sound/soc/codecs/wm8994.c10
-rw-r--r--sound/soc/omap/am3517evm.c2
-rw-r--r--sound/soc/soc-dapm.c2
-rw-r--r--tools/perf/util/header.c11
-rw-r--r--tools/perf/util/probe-finder.c2
579 files changed, 13309 insertions, 9481 deletions
diff --git a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt
new file mode 100644
index 00000000000..569b1624851
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt
@@ -0,0 +1,93 @@
+CE4100 I2C
+----------
+
+CE4100 has one PCI device which is described as the I2C-Controller. This
+PCI device has three PCI-bars, each bar contains a complete I2C
+controller. So we have a total of three independent I2C-Controllers
+which share only an interrupt line.
+The driver is probed via the PCI-ID and is gathering the information of
+attached devices from the devices tree.
+Grant Likely recommended to use the ranges property to map the PCI-Bar
+number to its physical address and to use this to find the child nodes
+of the specific I2C controller. This were his exact words:
+
+ Here's where the magic happens. Each entry in
+ ranges describes how the parent pci address space
+ (middle group of 3) is translated to the local
+ address space (first group of 2) and the size of
+ each range (last cell). In this particular case,
+ the first cell of the local address is chosen to be
+ 1:1 mapped to the BARs, and the second is the
+ offset from be base of the BAR (which would be
+ non-zero if you had 2 or more devices mapped off
+ the same BAR)
+
+ ranges allows the address mapping to be described
+ in a way that the OS can interpret without
+ requiring custom device driver code.
+
+This is an example which is used on FalconFalls:
+------------------------------------------------
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+
+ /* as described by Grant, the first number in the group of
+ * three is the bar number followed by the 64bit bar address
+ * followed by size of the mapping. The bar address
+ * requires also a valid translation in parents ranges
+ * property.
+ */
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+
+ /* The first number in the reg property is the
+ * number of the bar
+ */
+ reg = <0 0 0x100>;
+
+ /* This I2C controller has no devices */
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ /* This I2C controller has one gpio controller */
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-cmos.txt b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
new file mode 100644
index 00000000000..7382989b305
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
@@ -0,0 +1,28 @@
+ Motorola mc146818 compatible RTC
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Required properties:
+ - compatible : "motorola,mc146818"
+ - reg : should contain registers location and length.
+
+Optional properties:
+ - interrupts : should contain interrupt.
+ - interrupt-parent : interrupt source phandle.
+ - ctrl-reg : Contains the initial value of the control register also
+ called "Register B".
+ - freq-reg : Contains the initial value of the frequency register also
+ called "Regsiter A".
+
+"Register A" and "B" are usually initialized by the firmware (BIOS for
+instance). If this is not done, it can be performed by the driver.
+
+ISA Example:
+
+ rtc@70 {
+ compatible = "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
diff --git a/Documentation/devicetree/bindings/x86/ce4100.txt b/Documentation/devicetree/bindings/x86/ce4100.txt
new file mode 100644
index 00000000000..b49ae593a60
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/ce4100.txt
@@ -0,0 +1,38 @@
+CE4100 Device Tree Bindings
+---------------------------
+
+The CE4100 SoC uses for in core peripherals the following compatible
+format: <vendor>,<chip>-<device>.
+Many of the "generic" devices like HPET or IO APIC have the ce4100
+name in their compatible property because they first appeared in this
+SoC.
+
+The CPU node
+------------
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+
+The reg property describes the CPU number. The lapic property points to
+the local APIC timer.
+
+The SoC node
+------------
+
+This node describes the in-core peripherals. Required property:
+ compatible = "intel,ce4100-cp";
+
+The PCI node
+------------
+This node describes the PCI bus on the SoC. Its property should be
+ compatible = "intel,ce4100-pci", "pci";
+
+If the OS is using the IO-APIC for interrupt routing then the reported
+interrupt numbers for devices is no longer true. In order to obtain the
+correct interrupt number, the child node which represents the device has
+to contain the interrupt property. Besides the interrupt property it has
+to contain at least the reg property containing the PCI bus address and
+compatible property according to "PCI Bus Binding Revision 2.1".
diff --git a/Documentation/devicetree/bindings/x86/interrupt.txt b/Documentation/devicetree/bindings/x86/interrupt.txt
new file mode 100644
index 00000000000..7d19f494f19
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/interrupt.txt
@@ -0,0 +1,26 @@
+Interrupt chips
+---------------
+
+* Intel I/O Advanced Programmable Interrupt Controller (IO APIC)
+
+ Required properties:
+ --------------------
+ compatible = "intel,ce4100-ioapic";
+ #interrupt-cells = <2>;
+
+ Device's interrupt property:
+
+ interrupts = <P S>;
+
+ The first number (P) represents the interrupt pin which is wired to the
+ IO APIC. The second number (S) represents the sense of interrupt which
+ should be configured and can be one of:
+ 0 - Edge Rising
+ 1 - Level Low
+ 2 - Level High
+ 3 - Edge Falling
+
+* Local APIC
+ Required property:
+
+ compatible = "intel,ce4100-lapic";
diff --git a/Documentation/devicetree/bindings/x86/timer.txt b/Documentation/devicetree/bindings/x86/timer.txt
new file mode 100644
index 00000000000..c688af58e3b
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/timer.txt
@@ -0,0 +1,6 @@
+Timers
+------
+
+* High Precision Event Timer (HPET)
+ Required property:
+ compatible = "intel,ce4100-hpet";
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index 28b1c9d3d35..55fd2623445 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -13,6 +13,7 @@ Table of Contents
I - Introduction
1) Entry point for arch/powerpc
+ 2) Entry point for arch/x86
II - The DT block format
1) Header
@@ -225,6 +226,25 @@ it with special cases.
cannot support both configurations with Book E and configurations
with classic Powerpc architectures.
+2) Entry point for arch/x86
+-------------------------------
+
+ There is one single 32bit entry point to the kernel at code32_start,
+ the decompressor (the real mode entry point goes to the same 32bit
+ entry point once it switched into protected mode). That entry point
+ supports one calling convention which is documented in
+ Documentation/x86/boot.txt
+ The physical pointer to the device-tree block (defined in chapter II)
+ is passed via setup_data which requires at least boot protocol 2.09.
+ The type filed is defined as
+
+ #define SETUP_DTB 2
+
+ This device-tree is used as an extension to the "boot page". As such it
+ does not parse / consider data which is already covered by the boot
+ page. This includes memory size, reserved ranges, command line arguments
+ or initrd address. It simply holds information which can not be retrieved
+ otherwise like interrupt routing or a list of devices behind an I2C bus.
II - The DT block format
========================
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f4a04c0c7ed..738c6fda3fb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2444,6 +2444,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
<deci-seconds>: poll all this frequency
0: no polling (default)
+ threadirqs [KNL]
+ Force threading of all interrupt handlers except those
+ marked explicitely IRQF_NO_THREAD.
+
topology= [S390]
Format: {off | on}
Specify if the kernel should make use of the cpu
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 9104c106208..250160469d8 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -178,38 +178,29 @@ RTC class framework, but can't be supported by the older driver.
setting the longer alarm time and enabling its IRQ using a single
request (using the same model as EFI firmware).
- * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, it probably
- also offers update IRQs whenever the "seconds" counter changes.
- If needed, the RTC framework can emulate this mechanism.
+ * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, the RTC framework
+ will emulate this mechanism.
- * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... another
- feature often accessible with an IRQ line is a periodic IRQ, issued
- at settable frequencies (usually 2^N Hz).
+ * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... these icotls
+ are emulated via a kernel hrtimer.
In many cases, the RTC alarm can be a system wake event, used to force
Linux out of a low power sleep state (or hibernation) back to a fully
operational state. For example, a system could enter a deep power saving
state until it's time to execute some scheduled tasks.
-Note that many of these ioctls need not actually be implemented by your
-driver. The common rtc-dev interface handles many of these nicely if your
-driver returns ENOIOCTLCMD. Some common examples:
+Note that many of these ioctls are handled by the common rtc-dev interface.
+Some common examples:
* RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be
called with appropriate values.
- * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: the
- set_alarm/read_alarm functions will be called.
+ * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: gets or sets
+ the alarm rtc_timer. May call the set_alarm driver function.
- * RTC_IRQP_SET, RTC_IRQP_READ: the irq_set_freq function will be called
- to set the frequency while the framework will handle the read for you
- since the frequency is stored in the irq_freq member of the rtc_device
- structure. Your driver needs to initialize the irq_freq member during
- init. Make sure you check the requested frequency is in range of your
- hardware in the irq_set_freq function. If it isn't, return -EINVAL. If
- you cannot actually change the frequency, do not define irq_set_freq.
+ * RTC_IRQP_SET, RTC_IRQP_READ: These are emulated by the generic code.
- * RTC_PIE_ON, RTC_PIE_OFF: the irq_set_state function will be called.
+ * RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
If all else fails, check out the rtc-test.c driver!
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907..2e3c64b1a6a 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
The routines look the same as above:
- rwlock_t xxx_lock = RW_LOCK_UNLOCKED;
+ rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
unsigned long flags;
@@ -196,25 +196,3 @@ appropriate:
For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
-
-SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
-with lockdep state tracking.
-
-Most of the time, you can simply turn:
- static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
-into:
- static DEFINE_SPINLOCK(xxx_lock);
-
-Static structure member variables go from:
-
- struct foo bar {
- .lock = SPIN_LOCK_UNLOCKED;
- };
-
-to:
-
- struct foo bar {
- .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
- };
-
-Declaration of static rw_locks undergo a similar transformation.
diff --git a/MAINTAINERS b/MAINTAINERS
index 560ecce38ff..f1bc3dc6b36 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4292,10 +4292,7 @@ S: Maintained
F: net/sched/sch_netem.c
NETERION 10GbE DRIVERS (s2io/vxge)
-M: Ramkrishna Vepa <ramkrishna.vepa@exar.com>
-M: Sivakumar Subramani <sivakumar.subramani@exar.com>
-M: Sreenivasa Honnur <sreenivasa.honnur@exar.com>
-M: Jon Mason <jon.mason@exar.com>
+M: Jon Mason <jdmason@kudzu.us>
L: netdev@vger.kernel.org
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
diff --git a/Makefile b/Makefile
index 504f788773e..d6592b63c8c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 38
-EXTRAVERSION = -rc8
+EXTRAVERSION =
NAME = Flesh-Eating Bats with Fangs
# *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index 945de222ab9..e8a761aee08 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -29,7 +29,7 @@
: "r" (uaddr), "r"(oparg) \
: "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -81,21 +81,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int prev, cmp;
+ int ret = 0, cmp;
+ u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
__asm__ __volatile__ (
__ASM_SMP_MB
- "1: ldl_l %0,0(%2)\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,3f\n"
- " mov %4,%1\n"
- "2: stl_c %1,0(%2)\n"
- " beq %1,4f\n"
+ "1: ldl_l %1,0(%3)\n"
+ " cmpeq %1,%4,%2\n"
+ " beq %2,3f\n"
+ " mov %5,%2\n"
+ "2: stl_c %2,0(%3)\n"
+ " beq %2,4f\n"
"3: .subsection 2\n"
"4: br 1b\n"
" .previous\n"
@@ -105,11 +107,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
" .long 2b-.\n"
" lda $31,3b-2b(%0)\n"
" .previous\n"
- : "=&r"(prev), "=&r"(cmp)
+ : "+r"(ret), "=&r"(prev), "=&r"(cmp)
: "r"(uaddr), "r"((long)oldval), "r"(newval)
: "memory");
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __KERNEL__ */
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b5433..a83bbea62c6 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
#ifdef __KERNEL__
#include <linux/compiler.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
static inline void __down_read(struct rw_semaphore *sem)
{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
#endif
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5045e..376f2213079 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
}
-static int
-do_osf_statfs(struct path *path, struct osf_statfs __user *buffer,
- unsigned long bufsiz)
+SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
+ struct osf_statfs __user *, buffer, unsigned long, bufsiz)
{
struct kstatfs linux_stat;
- int error = vfs_statfs(path, &linux_stat);
+ int error = user_statfs(pathname, &linux_stat);
if (!error)
error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
return error;
}
-SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
- struct osf_statfs __user *, buffer, unsigned long, bufsiz)
-{
- struct path path;
- int retval;
-
- retval = user_path(pathname, &path);
- if (!retval) {
- retval = do_osf_statfs(&path, buffer, bufsiz);
- path_put(&path);
- }
- return retval;
-}
-
SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
struct osf_statfs __user *, buffer, unsigned long, bufsiz)
{
- struct file *file;
- int retval;
-
- retval = -EBADF;
- file = fget(fd);
- if (file) {
- retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
- fput(file);
- }
- return retval;
+ struct kstatfs linux_stat;
+ int error = fd_statfs(fd, &linux_stat);
+ if (!error)
+ error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
+ return error;
}
/*
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index f6c108a3d67..8c13a0c7783 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -149,6 +149,7 @@ static int
titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
bool force)
{
+ unsigned int irq = d->irq;
spin_lock(&titan_irq_lock);
titan_cpu_set_irq_affinity(irq - 16, *affinity);
titan_update_irq_hw(titan_cached_irq_mask);
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a..a58e84f1a63 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t timer_interrupt(int irq, void *dev)
{
@@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev)
profile_tick(CPU_PROFILING);
#endif
- write_seqlock(&xtime_lock);
-
/*
* Calculate how many ticks have passed since the last update,
* including any previous partial leftover. Save any resulting
@@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev)
nticks = delta >> FIX_SHIFT;
if (nticks)
- do_timer(nticks);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(nticks);
if (test_irq_work_pending()) {
clear_irq_work_pending();
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index b33fe7065b3..199a6b6de7f 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -35,7 +35,7 @@
: "cc", "memory")
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -46,7 +46,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable(); /* implies preempt_disable() */
@@ -88,36 +88,35 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int val;
+ int ret = 0;
+ u32 val;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
-
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
- "1: " T(ldr) " %0, [%3]\n"
- " teq %0, %1\n"
+ "1: " T(ldr) " %1, [%4]\n"
+ " teq %1, %2\n"
" it eq @ explicit IT needed for the 2b label\n"
- "2: " T(streq) " %2, [%3]\n"
+ "2: " T(streq) " %3, [%4]\n"
"3:\n"
" .pushsection __ex_table,\"a\"\n"
" .align 3\n"
" .long 1b, 4f, 2b, 4f\n"
" .popsection\n"
" .pushsection .fixup,\"ax\"\n"
- "4: mov %0, %4\n"
+ "4: mov %0, %5\n"
" b 3b\n"
" .popsection"
- : "=&r" (val)
+ : "+r" (ret), "=&r" (val)
: "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
: "cc", "memory");
- pagefault_enable(); /* subsumes preempt_enable() */
-
- return val;
+ *uval = val;
+ return ret;
}
#endif /* !SMP */
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3d76bf23373..1ff46cabc7e 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -107,9 +107,7 @@ void timer_tick(void)
{
profile_tick(CPU_PROFILING);
do_leds();
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h
index 8fe283ccd1f..61fef9129c6 100644
--- a/arch/arm/mach-clps711x/include/mach/time.h
+++ b/arch/arm/mach-clps711x/include/mach/time.h
@@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id)
{
struct pt_regs *regs = get_irq_regs();
do_leds();
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index c9113619029..8d73724c009 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#ifdef CONFIG_CORE_TIMER_IRQ_L1
__attribute__((l1_text))
#endif
irqreturn_t timer_interrupt(int irq, void *dummy)
{
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifdef CONFIG_IPIPE
update_root_process_times(get_irq_regs());
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 00eb36f8deb..20c85b5dc7d 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -140,7 +140,7 @@ stop_watchdog(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
//static unsigned short myjiff; /* used by our debug routine print_timestamp */
@@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id)
/* call the real timer interrupt handler */
- do_timer(1);
+ xtime_update(1);
cris_do_profile(regs); /* Save profiling information */
return IRQ_HANDLED;
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 84fed3b4b07..4c9e3e1ba5d 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -26,7 +26,9 @@
#define FLUSH_ALL (void*)0xffffffff
/* Vector of locks used for various atomic operations */
-spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
+spinlock_t cris_atomic_locks[] = {
+ [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
+};
/* CPU masks */
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index a545211e999..bb978ede898 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick.
+ * as well as call the "xtime_update()" routine every clocktick.
*/
extern void cris_do_profile(struct pt_regs *regs);
@@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
/* Call the real timer interrupt handler */
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
return IRQ_HANDLED;
}
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 08b3d1da358..4bea27f50a7 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,10 +7,11 @@
#include <asm/errno.h>
#include <asm/uaccess.h>
-extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr);
+extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
return -ENOSYS;
}
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index 14f64b054c7..d155ca9e509 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -18,7 +18,7 @@
* the various futex operations; MMU fault checking is ignored under no-MMU
* conditions
*/
-static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -50,7 +50,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_o
return ret;
}
-static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -83,7 +83,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_o
return ret;
}
-static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -116,7 +116,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_ol
return ret;
}
-static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -149,7 +149,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_o
return ret;
}
-static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -186,7 +186,7 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_o
/*
* do the futex operations
*/
-int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -197,7 +197,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 0ddbbae83cb..b457de496b7 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -50,21 +50,13 @@ static struct irqaction timer_irq = {
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
profile_tick(CPU_PROFILING);
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don't need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
+ xtime_update(1);
#ifdef CONFIG_HEARTBEAT
static unsigned short n;
@@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
__set_LEDS(n);
#endif /* CONFIG_HEARTBEAT */
- write_sequnlock(&xtime_lock);
-
update_process_times(user_mode(get_irq_regs()));
return IRQ_HANDLED;
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 165005aff9d..32263a138aa 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -35,9 +35,7 @@ void h8300_timer_tick(void)
{
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
}
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
index 3946c0fa837..7a1533fad47 100644
--- a/arch/h8300/kernel/timer/timer8.c
+++ b/arch/h8300/kernel/timer/timer8.c
@@ -61,7 +61,7 @@
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index c7f0f062239..8428525ddb2 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -46,7 +46,7 @@ do { \
} while (0)
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
{
- register unsigned long r8 __asm ("r8");
+ register unsigned long r8 __asm ("r8") = 0;
+ unsigned long prev;
__asm__ __volatile__(
" mf;; \n"
" mov ar.ccv=%3;; \n"
"[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n"
" .xdata4 \"__ex_table\", 1b-., 2f-. \n"
"[2:]"
- : "=r" (r8)
+ : "=r" (prev)
: "r" (uaddr), "r" (newval),
"rO" ((long) (unsigned) oldval)
: "memory");
+ *uval = prev;
return r8;
}
}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d..3027e7516d8 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
#include <asm/intrinsics.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
#define RWSEM_ACTIVE_BIAS (1L)
#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void
-init_rwsem (struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
/*
* lock for reading
*/
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
index 96fc62366aa..ed28bcd5bb8 100644
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
static inline int
xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489..156ad803d5b 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
+ if (smp_processor_id() == time_keeper_id)
+ xtime_update(1);
+
+ local_cpu_data->itm_next = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
@@ -222,7 +213,7 @@ skip_process_time_accounting:
* comfort, we increase the safety margin by
* intentionally dropping the next tick(s). We do NOT
* update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
+ * xtime_update() which in turn would let our clock run
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
index fd66b048c6f..419c8620945 100644
--- a/arch/ia64/xen/suspend.c
+++ b/arch/ia64/xen/suspend.c
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
/* nothing */
}
-void xen_pre_device_suspend(void)
-{
- /* nothing */
-}
-
void
-xen_pre_suspend()
+xen_arch_pre_suspend()
{
/* nothing */
}
void
-xen_post_suspend(int suspend_cancelled)
+xen_arch_post_suspend(int suspend_cancelled)
{
if (suspend_cancelled)
return;
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index c1c544513e8..1f8244a78be 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm)
run_posix_cpu_timers(p);
delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
- if (cpu == time_keeper_id) {
- write_seqlock(&xtime_lock);
- do_timer(stolen + blocked);
- local_cpu_data->itm_next = delta_itm + new_itm;
- write_sequnlock(&xtime_lock);
- } else {
- local_cpu_data->itm_next = delta_itm + new_itm;
- }
+ if (cpu == time_keeper_id)
+ xtime_update(stolen + blocked);
+
+ local_cpu_data->itm_next = delta_itm + new_itm;
+
per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
}
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index bda86820bff..84dd04048db 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
#ifndef CONFIG_SMP
profile_tick(CPU_PROFILING);
#endif
- /* XXX FIXME. Uh, the xtime_lock should be held here, no? */
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 9fe6fefb5e1..1edd95095cb 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long);
extern void bvme6000_reset (void);
void bvme6000_set_vectors (void);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via bvme6000_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 06438dac08f..18b34ee5db3 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 100baaa692a..6cb9c3a9b6c 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -46,8 +46,8 @@ extern void mvme147_reset (void);
static int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme147_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme147_process_int() */
irq_handler_t tick_handler;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 11edf61cc2c..0b28e262165 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -51,8 +51,8 @@ extern void mvme16x_reset (void);
int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme16x_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme16x_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 2d9e21bd313..6464ad3ae3e 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
#ifdef CONFIG_SUN3
intersil_clear();
#endif
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
if (!(kstat_cpu(0).irqs[irq] % 20))
sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d6ac2a43453..6623909f70e 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
#ifndef CONFIG_GENERIC_CLOCKEVENTS
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t arch_timer_interrupt(int irq, void *dummy)
{
@@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
-
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index ad3fd61b2fe..b0526d2716f 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,7 +29,7 @@
})
static inline int
-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -94,31 +94,34 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int prev, cmp;
+ int ret = 0, cmp;
+ u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- __asm__ __volatile__ ("1: lwx %0, %2, r0; \
- cmp %1, %0, %3; \
- beqi %1, 3f; \
- 2: swx %4, %2, r0; \
- addic %1, r0, 0; \
- bnei %1, 1b; \
+ __asm__ __volatile__ ("1: lwx %1, %3, r0; \
+ cmp %2, %1, %4; \
+ beqi %2, 3f; \
+ 2: swx %5, %3, r0; \
+ addic %2, r0, 0; \
+ bnei %2, 1b; \
3: \
.section .fixup,\"ax\"; \
4: brid 3b; \
- addik %0, r0, %5; \
+ addik %0, r0, %6; \
.previous; \
.section __ex_table,\"a\"; \
.word 1b,4b,2b,4b; \
.previous;" \
- : "=&r" (prev), "=&r"(cmp) \
+ : "+r" (ret), "=&r" (prev), "=&r"(cmp) \
: "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT));
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __KERNEL__ */
diff --git a/arch/microblaze/include/asm/pci-bridge.h b/arch/microblaze/include/asm/pci-bridge.h
index 0c68764ab54..10717669e0c 100644
--- a/arch/microblaze/include/asm/pci-bridge.h
+++ b/arch/microblaze/include/asm/pci-bridge.h
@@ -104,11 +104,22 @@ struct pci_controller {
int global_number; /* PCI domain number */
};
+#ifdef CONFIG_PCI
static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
{
return bus->sysdata;
}
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ struct pci_controller *host;
+
+ if (bus->self)
+ return pci_device_to_OF_node(bus->self);
+ host = pci_bus_to_host(bus);
+ return host ? host->dn : NULL;
+}
+
static inline int isa_vaddr_is_ioport(void __iomem *address)
{
/* No specific ISA handling on ppc32 at this stage, it
@@ -116,6 +127,7 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
*/
return 0;
}
+#endif /* CONFIG_PCI */
/* These are used for config access before all the PCI probing
has been done. */
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 2e72af078b0..d0890d36ef6 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -64,21 +64,6 @@ extern void kdump_move_device_tree(void);
/* CPU OF node matching */
struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-struct of_irq;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 9ae24f4b882..47187cc2cf0 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -2,88 +2,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/pci_regs.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
#include <linux/of_address.h>
#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#ifdef CONFIG_PCI
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
-{
- struct device_node *dn, *ppnode;
- struct pci_dev *ppdev;
- u32 lspec;
- u32 laddr[3];
- u8 pin;
- int rc;
-
- /* Check if we have a device node, if yes, fallback to standard OF
- * parsing
- */
- dn = pci_device_to_OF_node(pdev);
- if (dn)
- return of_irq_map_one(dn, 0, out_irq);
-
- /* Ok, we don't, time to have fun. Let's start by building up an
- * interrupt spec. we assume #interrupt-cells is 1, which is standard
- * for PCI. If you do different, then don't use that routine.
- */
- rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
- if (rc != 0)
- return rc;
- /* No pin, exit */
- if (pin == 0)
- return -ENODEV;
-
- /* Now we walk up the PCI tree */
- lspec = pin;
- for (;;) {
- /* Get the pci_dev of our parent */
- ppdev = pdev->bus->self;
-
- /* Ouch, it's a host bridge... */
- if (ppdev == NULL) {
- struct pci_controller *host;
- host = pci_bus_to_host(pdev->bus);
- ppnode = host ? host->dn : NULL;
- /* No node for host bridge ? give up */
- if (ppnode == NULL)
- return -EINVAL;
- } else
- /* We found a P2P bridge, check if it has a node */
- ppnode = pci_device_to_OF_node(ppdev);
-
- /* Ok, we have found a parent with a device-node, hand over to
- * the OF parsing code.
- * We build a unit address from the linux device to be used for
- * resolution. Note that we use the linux bus number which may
- * not match your firmware bus numbering.
- * Fortunately, in most cases, interrupt-map-mask doesn't
- * include the bus number as part of the matching.
- * You should still be careful about that though if you intend
- * to rely on this function (you ship a firmware that doesn't
- * create device nodes for all PCI devices).
- */
- if (ppnode)
- break;
-
- /* We can only get here if we hit a P2P bridge with no node,
- * let's do standard swizzling and try again
- */
- lspec = pci_swizzle_interrupt_pin(pdev, lspec);
- pdev = ppdev;
- }
-
- laddr[0] = (pdev->bus->number << 16)
- | (pdev->devfn << 8);
- laddr[1] = laddr[2] = 0;
- return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
-}
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
-#endif /* CONFIG_PCI */
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index e363615d679..1e01a125363 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <asm/processor.h>
#include <asm/io.h>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc0566bc..d88983516e2 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_IDE
select HAVE_OPROFILE
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_ARCH_KGDB
@@ -208,6 +209,7 @@ config MACH_JZ4740
select ARCH_REQUIRE_GPIOLIB
select SYS_HAS_EARLY_PRINTK
select HAVE_PWM
+ select HAVE_CLK
config LASAT
bool "LASAT Networks platforms"
@@ -333,6 +335,8 @@ config PNX8550_STB810
config PMC_MSP
bool "PMC-Sierra MSP chipsets"
depends on EXPERIMENTAL
+ select CEVT_R4K
+ select CSRC_R4K
select DMA_NONCOHERENT
select SWAP_IO_SPACE
select NO_EXCEPT_FILL
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index 6398fa95905..40b84b99119 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert);
static void mtx1_reset(char *c)
{
- /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */
- au_writel(0x00000000, 0xAE00001C);
+ /* Jump to the reset vector */
+ __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
}
static void mtx1_power_off(void)
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index e30e42add69..956f946218c 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -28,6 +28,8 @@
#include <linux/mtd/physmap.h>
#include <mtd/mtd-abi.h>
+#include <asm/mach-au1x00/au1xxx_eth.h>
+
static struct gpio_keys_button mtx1_gpio_button[] = {
{
.gpio = 207,
@@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = {
&mtx1_mtd,
};
+static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = {
+ .phy_search_highest_addr = 1,
+ .phy1_search_mac0 = 1,
+};
+
static int __init mtx1_register_devices(void)
{
int rc;
+ au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata);
+
rc = gpio_request(mtx1_gpio_button[0].gpio,
mtx1_gpio_button[0].desc);
if (rc < 0) {
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index b43c918925d..80c521e5290 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -36,8 +36,8 @@
static void xxs1500_reset(char *c)
{
- /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */
- au_writel(0x00000000, 0xAE00001C);
+ /* Jump to the reset vector */
+ __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
}
static void xxs1500_power_off(void)
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b9cce90346c..6ebf1734b41 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -75,7 +75,7 @@
}
static inline int
-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -85,7 +85,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -132,11 +132,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int retval;
+ int ret = 0;
+ u32 val;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
if (cpu_has_llsc && R10000_LLSC_WAR) {
@@ -145,25 +147,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
" .set push \n"
" .set noat \n"
" .set mips3 \n"
- "1: ll %0, %2 \n"
- " bne %0, %z3, 3f \n"
+ "1: ll %1, %3 \n"
+ " bne %1, %z4, 3f \n"
" .set mips0 \n"
- " move $1, %z4 \n"
+ " move $1, %z5 \n"
" .set mips3 \n"
- "2: sc $1, %1 \n"
+ "2: sc $1, %2 \n"
" beqzl $1, 1b \n"
__WEAK_LLSC_MB
"3: \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
- "4: li %0, %5 \n"
+ "4: li %0, %6 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "=&r" (retval), "=R" (*uaddr)
+ : "+r" (ret), "=&r" (val), "=R" (*uaddr)
: "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
@@ -172,31 +174,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
" .set push \n"
" .set noat \n"
" .set mips3 \n"
- "1: ll %0, %2 \n"
- " bne %0, %z3, 3f \n"
+ "1: ll %1, %3 \n"
+ " bne %1, %z4, 3f \n"
" .set mips0 \n"
- " move $1, %z4 \n"
+ " move $1, %z5 \n"
" .set mips3 \n"
- "2: sc $1, %1 \n"
+ "2: sc $1, %2 \n"
" beqz $1, 1b \n"
__WEAK_LLSC_MB
"3: \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
- "4: li %0, %5 \n"
+ "4: li %0, %6 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "=&r" (retval), "=R" (*uaddr)
+ : "+r" (ret), "=&r" (val), "=R" (*uaddr)
: "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
: "memory");
} else
return -ENOSYS;
- return retval;
+ *uval = val;
+ return ret;
}
#endif
diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h
index e00007cf816..d0c77496c72 100644
--- a/arch/mips/include/asm/perf_event.h
+++ b/arch/mips/include/asm/perf_event.h
@@ -11,15 +11,5 @@
#ifndef __MIPS_PERF_EVENT_H__
#define __MIPS_PERF_EVENT_H__
-
-/*
- * MIPS performance counters do not raise NMI upon overflow, a regular
- * interrupt will be signaled. Hence we can do the pending perf event
- * work at the tail of the irq handler.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
+/* Leave it empty here. The file is required by linux/perf_event.h */
#endif /* __MIPS_PERF_EVENT_H__ */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 5a84a1f1123..94ca2b018af 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -17,29 +17,13 @@
#include <asm/cacheflush.h>
#include <asm/uasm.h>
-/*
- * If the Instruction Pointer is in module space (0xc0000000), return true;
- * otherwise, it is in kernel space (0x80000000), return false.
- *
- * FIXME: This will not work when the kernel space and module space are the
- * same. If they are the same, we need to modify scripts/recordmcount.pl,
- * ftrace_make_nop/call() and the other related parts to ensure the
- * enabling/disabling of the calling site to _mcount is right for both kernel
- * and module.
- */
-
-static inline int in_module(unsigned long ip)
-{
- return ip & 0x40000000;
-}
+#include <asm-generic/sections.h>
#ifdef CONFIG_DYNAMIC_FTRACE
#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
-#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */
-#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */
#define INSN_NOP 0x00000000 /* nop */
#define INSN_JAL(addr) \
((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK)))
@@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void)
#endif
}
+/*
+ * Check if the address is in kernel space
+ *
+ * Clone core_kernel_text() from kernel/extable.c, but doesn't call
+ * init_kernel_text() for Ftrace doesn't trace functions in init sections.
+ */
+static inline int in_kernel_space(unsigned long ip)
+{
+ if (ip >= (unsigned long)_stext &&
+ ip <= (unsigned long)_etext)
+ return 1;
+ return 0;
+}
+
static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
{
int faulted;
@@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
return 0;
}
+/*
+ * The details about the calling site of mcount on MIPS
+ *
+ * 1. For kernel:
+ *
+ * move at, ra
+ * jal _mcount --> nop
+ *
+ * 2. For modules:
+ *
+ * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT
+ *
+ * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
+ * addiu v1, v1, low_16bit_of_mcount
+ * move at, ra
+ * move $12, ra_address
+ * jalr v1
+ * sub sp, sp, 8
+ * 1: offset = 5 instructions
+ * 2.2 For the Other situations
+ *
+ * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
+ * addiu v1, v1, low_16bit_of_mcount
+ * move at, ra
+ * jalr v1
+ * nop | move $12, ra_address | sub sp, sp, 8
+ * 1: offset = 4 instructions
+ */
+
+#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
+#define MCOUNT_OFFSET_INSNS 5
+#else
+#define MCOUNT_OFFSET_INSNS 4
+#endif
+#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod,
unsigned long ip = rec->ip;
/*
- * We have compiled module with -mlong-calls, but compiled the kernel
- * without it, we need to cope with them respectively.
+ * If ip is in kernel space, no long call, otherwise, long call is
+ * needed.
*/
- if (in_module(ip)) {
-#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
- /*
- * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
- * addiu v1, v1, low_16bit_of_mcount
- * move at, ra
- * move $12, ra_address
- * jalr v1
- * sub sp, sp, 8
- * 1: offset = 5 instructions
- */
- new = INSN_B_1F_5;
-#else
- /*
- * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
- * addiu v1, v1, low_16bit_of_mcount
- * move at, ra
- * jalr v1
- * nop | move $12, ra_address | sub sp, sp, 8
- * 1: offset = 4 instructions
- */
- new = INSN_B_1F_4;
-#endif
- } else {
- /*
- * move at, ra
- * jal _mcount --> nop
- */
- new = INSN_NOP;
- }
+ new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+
return ftrace_modify_code(ip, new);
}
@@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
unsigned int new;
unsigned long ip = rec->ip;
- /* ip, module: 0xc0000000, kernel: 0x80000000 */
- new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller;
+ new = in_kernel_space(ip) ? insn_jal_ftrace_caller :
+ insn_lui_v1_hi16_mcount;
return ftrace_modify_code(ip, new);
}
@@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void)
#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */
#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */
-unsigned long ftrace_get_parent_addr(unsigned long self_addr,
- unsigned long parent,
- unsigned long parent_addr,
- unsigned long fp)
+unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
+ old_parent_ra, unsigned long parent_ra_addr, unsigned long fp)
{
- unsigned long sp, ip, ra;
+ unsigned long sp, ip, tmp;
unsigned int code;
int faulted;
/*
- * For module, move the ip from calling site of mcount to the
- * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for
- * kernel, move to the instruction "move ra, at"(offset is 12)
+ * For module, move the ip from the return address after the
+ * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
+ * kernel, move after the instruction "move ra, at"(offset is 16)
*/
- ip = self_addr - (in_module(self_addr) ? 20 : 12);
+ ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
/*
* search the text until finding the non-store instruction or "s{d,w}
* ra, offset(sp)" instruction
*/
do {
- ip -= 4;
-
/* get the code at "ip": code = *(unsigned int *)ip; */
safe_load_code(code, ip, faulted);
@@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
* store the ra on the stack
*/
if ((code & S_R_SP) != S_R_SP)
- return parent_addr;
+ return parent_ra_addr;
- } while (((code & S_RA_SP) != S_RA_SP));
+ /* Move to the next instruction */
+ ip -= 4;
+ } while ((code & S_RA_SP) != S_RA_SP);
sp = fp + (code & OFFSET_MASK);
- /* ra = *(unsigned long *)sp; */
- safe_load_stack(ra, sp, faulted);
+ /* tmp = *(unsigned long *)sp; */
+ safe_load_stack(tmp, sp, faulted);
if (unlikely(faulted))
return 0;
- if (ra == parent)
+ if (tmp == old_parent_ra)
return sp;
return 0;
}
@@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
* Hook the return address and push it in the stack of return addrs
* in current thread info.
*/
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
unsigned long fp)
{
- unsigned long old;
+ unsigned long old_parent_ra;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
- int faulted;
+ int faulted, insns;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
/*
- * "parent" is the stack address saved the return address of the caller
- * of _mcount.
+ * "parent_ra_addr" is the stack address saved the return address of
+ * the caller of _mcount.
*
* if the gcc < 4.5, a leaf function does not save the return address
* in the stack address, so, we "emulate" one in _mcount's stack space,
@@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
* do it in ftrace_graph_caller of mcount.S.
*/
- /* old = *parent; */
- safe_load_stack(old, parent, faulted);
+ /* old_parent_ra = *parent_ra_addr; */
+ safe_load_stack(old_parent_ra, parent_ra_addr, faulted);
if (unlikely(faulted))
goto out;
#ifndef KBUILD_MCOUNT_RA_ADDRESS
- parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old,
- (unsigned long)parent, fp);
+ parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra,
+ old_parent_ra, (unsigned long)parent_ra_addr, fp);
/*
* If fails when getting the stack address of the non-leaf function's
* ra, stop function graph tracer and return
*/
- if (parent == 0)
+ if (parent_ra_addr == 0)
goto out;
#endif
- /* *parent = return_hooker; */
- safe_store_stack(return_hooker, parent, faulted);
+ /* *parent_ra_addr = return_hooker; */
+ safe_store_stack(return_hooker, parent_ra_addr, faulted);
if (unlikely(faulted))
goto out;
- if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
- -EBUSY) {
- *parent = old;
+ if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
+ == -EBUSY) {
+ *parent_ra_addr = old_parent_ra;
return;
}
- trace.func = self_addr;
+ /*
+ * Get the recorded ip of the current mcount calling site in the
+ * __mcount_loc section, which will be used to filter the function
+ * entries configured through the tracing/set_graph_function interface.
+ */
+
+ insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+ trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
- *parent = old;
+ *parent_ra_addr = old_parent_ra;
}
return;
out:
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index 2b7f3f703b8..a8244854d3d 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event,
return ret;
}
-static int mipspmu_enable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int idx;
- int err = 0;
-
- /* To look for a free counter for this event. */
- idx = mipspmu->alloc_counter(cpuc, hwc);
- if (idx < 0) {
- err = idx;
- goto out;
- }
-
- /*
- * If there is an event in the counter we are going to use then
- * make sure it is disabled.
- */
- event->hw.idx = idx;
- mipspmu->disable_event(idx);
- cpuc->events[idx] = event;
-
- /* Set the period for the event. */
- mipspmu_event_set_period(event, hwc, idx);
-
- /* Enable the event. */
- mipspmu->enable_event(hwc, idx);
-
- /* Propagate our changes to the userspace mapping. */
- perf_event_update_userpage(event);
-
-out:
- return err;
-}
-
static void mipspmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
@@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event,
unsigned long flags;
int shift = 64 - TOTAL_BITS;
s64 prev_raw_count, new_raw_count;
- s64 delta;
+ u64 delta;
again:
prev_raw_count = local64_read(&hwc->prev_count);
@@ -231,32 +196,90 @@ again:
return;
}
-static void mipspmu_disable(struct perf_event *event)
+static void mipspmu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!mipspmu)
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* Set the period for the event. */
+ mipspmu_event_set_period(event, hwc, hwc->idx);
+
+ /* Enable the event. */
+ mipspmu->enable_event(hwc, hwc->idx);
+}
+
+static void mipspmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!mipspmu)
+ return;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* We are working on a local event. */
+ mipspmu->disable_event(hwc->idx);
+ barrier();
+ mipspmu_event_update(event, hwc, hwc->idx);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static int mipspmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
+ int idx;
+ int err = 0;
+ perf_pmu_disable(event->pmu);
- WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
+ /* To look for a free counter for this event. */
+ idx = mipspmu->alloc_counter(cpuc, hwc);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
- /* We are working on a local event. */
+ /*
+ * If there is an event in the counter we are going to use then
+ * make sure it is disabled.
+ */
+ event->hw.idx = idx;
mipspmu->disable_event(idx);
+ cpuc->events[idx] = event;
- barrier();
-
- mipspmu_event_update(event, hwc, idx);
- cpuc->events[idx] = NULL;
- clear_bit(idx, cpuc->used_mask);
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ if (flags & PERF_EF_START)
+ mipspmu_start(event, PERF_EF_RELOAD);
+ /* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
+
+out:
+ perf_pmu_enable(event->pmu);
+ return err;
}
-static void mipspmu_unthrottle(struct perf_event *event)
+static void mipspmu_del(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
- mipspmu->enable_event(hwc, hwc->idx);
+ WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
+
+ mipspmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[idx] = NULL;
+ clear_bit(idx, cpuc->used_mask);
+
+ perf_event_update_userpage(event);
}
static void mipspmu_read(struct perf_event *event)
@@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event)
mipspmu_event_update(event, hwc, hwc->idx);
}
-static struct pmu pmu = {
- .enable = mipspmu_enable,
- .disable = mipspmu_disable,
- .unthrottle = mipspmu_unthrottle,
- .read = mipspmu_read,
-};
+static void mipspmu_enable(struct pmu *pmu)
+{
+ if (mipspmu)
+ mipspmu->start();
+}
+
+static void mipspmu_disable(struct pmu *pmu)
+{
+ if (mipspmu)
+ mipspmu->stop();
+}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmu_reserve_mutex);
@@ -318,6 +346,82 @@ static void mipspmu_free_irq(void)
perf_irq = save_perf_irq;
}
+/*
+ * mipsxx/rm9000/loongson2 have different performance counters, they have
+ * specific low-level init routines.
+ */
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (atomic_dec_and_mutex_lock(&active_events,
+ &pmu_reserve_mutex)) {
+ /*
+ * We must not call the destroy function with interrupts
+ * disabled.
+ */
+ on_each_cpu(reset_counters,
+ (void *)(long)mipspmu->num_counters, 1);
+ mipspmu_free_irq();
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+}
+
+static int mipspmu_event_init(struct perf_event *event)
+{
+ int err = 0;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (!mipspmu || event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ if (!atomic_inc_not_zero(&active_events)) {
+ if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
+ atomic_dec(&active_events);
+ return -ENOSPC;
+ }
+
+ mutex_lock(&pmu_reserve_mutex);
+ if (atomic_read(&active_events) == 0)
+ err = mipspmu_get_irq();
+
+ if (!err)
+ atomic_inc(&active_events);
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+
+ if (err)
+ return err;
+
+ err = __hw_perf_event_init(event);
+ if (err)
+ hw_perf_event_destroy(event);
+
+ return err;
+}
+
+static struct pmu pmu = {
+ .pmu_enable = mipspmu_enable,
+ .pmu_disable = mipspmu_disable,
+ .event_init = mipspmu_event_init,
+ .add = mipspmu_add,
+ .del = mipspmu_del,
+ .start = mipspmu_start,
+ .stop = mipspmu_stop,
+ .read = mipspmu_read,
+};
+
static inline unsigned int
mipspmu_perf_event_encode(const struct mips_perf_event *pev)
{
@@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc,
{
struct hw_perf_event fake_hwc = event->hw;
- if (event->pmu && event->pmu != &pmu)
- return 0;
+ /* Allow mixed event group. So return 1 to pass validation. */
+ if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+ return 1;
return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
}
@@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event)
return 0;
}
-/*
- * mipsxx/rm9000/loongson2 have different performance counters, they have
- * specific low-level init routines.
- */
-static void reset_counters(void *arg);
-static int __hw_perf_event_init(struct perf_event *event);
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
- if (atomic_dec_and_mutex_lock(&active_events,
- &pmu_reserve_mutex)) {
- /*
- * We must not call the destroy function with interrupts
- * disabled.
- */
- on_each_cpu(reset_counters,
- (void *)(long)mipspmu->num_counters, 1);
- mipspmu_free_irq();
- mutex_unlock(&pmu_reserve_mutex);
- }
-}
-
-const struct pmu *hw_perf_event_init(struct perf_event *event)
-{
- int err = 0;
-
- if (!mipspmu || event->cpu >= nr_cpumask_bits ||
- (event->cpu >= 0 && !cpu_online(event->cpu)))
- return ERR_PTR(-ENODEV);
-
- if (!atomic_inc_not_zero(&active_events)) {
- if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
- atomic_dec(&active_events);
- return ERR_PTR(-ENOSPC);
- }
-
- mutex_lock(&pmu_reserve_mutex);
- if (atomic_read(&active_events) == 0)
- err = mipspmu_get_irq();
-
- if (!err)
- atomic_inc(&active_events);
- mutex_unlock(&pmu_reserve_mutex);
- }
-
- if (err)
- return ERR_PTR(err);
-
- err = __hw_perf_event_init(event);
- if (err)
- hw_perf_event_destroy(event);
-
- return err ? ERR_PTR(err) : &pmu;
-}
-
-void hw_perf_enable(void)
-{
- if (mipspmu)
- mipspmu->start();
-}
-
-void hw_perf_disable(void)
-{
- if (mipspmu)
- mipspmu->stop();
-}
-
/* This is needed by specific irq handlers in perf_event_*.c */
static void
handle_associated_event(struct cpu_hw_events *cpuc,
@@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc,
#include "perf_event_mipsxx.c"
/* Callchain handling code. */
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
- u64 ip)
-{
- if (entry->nr < PERF_MAX_STACK_DEPTH)
- entry->ip[entry->nr++] = ip;
-}
/*
* Leave userspace callchain empty for now. When we find a way to trace
* the user stack callchains, we add here.
*/
-static void
-perf_callchain_user(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
{
}
@@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
while (!kstack_end(sp)) {
addr = *sp++;
if (__kernel_text_address(addr)) {
- callchain_store(entry, addr);
+ perf_callchain_store(entry, addr);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
}
}
}
-static void
-perf_callchain_kernel(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
{
unsigned long sp = regs->regs[29];
#ifdef CONFIG_KALLSYMS
unsigned long ra = regs->regs[31];
unsigned long pc = regs->cp0_epc;
- callchain_store(entry, PERF_CONTEXT_KERNEL);
if (raw_show_trace || !__kernel_text_address(pc)) {
unsigned long stack_page =
(unsigned long)task_stack_page(current);
@@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs,
return;
}
do {
- callchain_store(entry, pc);
+ perf_callchain_store(entry, pc);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
pc = unwind_stack(current, &sp, pc, &ra);
} while (pc);
#else
- callchain_store(entry, PERF_CONTEXT_KERNEL);
save_raw_perf_callchain(entry, sp);
#endif
}
-
-static void
-perf_do_callchain(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
-{
- int is_user;
-
- if (!regs)
- return;
-
- is_user = user_mode(regs);
-
- if (!current || !current->pid)
- return;
-
- if (is_user && current->state != TASK_RUNNING)
- return;
-
- if (!is_user) {
- perf_callchain_kernel(regs, entry);
- if (current->mm)
- regs = task_pt_regs(current);
- else
- regs = NULL;
- }
- if (regs)
- perf_callchain_user(regs, entry);
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-
-struct perf_callchain_entry *
-perf_callchain(struct pt_regs *regs)
-{
- struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
-
- entry->nr = 0;
- perf_do_callchain(regs, entry);
- return entry;
-}
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 183e0d22666..d9a7db78ed6 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
* interrupt, not NMI.
*/
if (handled == IRQ_HANDLED)
- perf_event_do_pending();
+ irq_work_run();
#ifdef CONFIG_MIPS_MT_SMP
read_unlock(&pmuint_rwlock);
@@ -1045,6 +1045,8 @@ init_hw_perf_events(void)
"CPU, irq %d%s\n", mipspmu->name, counters, irq,
irq < 0 ? " (share with timer interrupt)" : "");
+ perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
return 0;
}
early_initcall(init_hw_perf_events);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5922342bca3..dbbe0ce48d8 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
static int protected_restore_fp_context(struct sigcontext __user *sc)
{
- int err, tmp;
+ int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index a0ed0e052b2..aae98661379 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
{
- int err, tmp;
+ int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb95cb4..32a25610108 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void)
*/
static struct task_struct *cpu_idle_thread[NR_CPUS];
+struct create_idle {
+ struct work_struct work;
+ struct task_struct *idle;
+ struct completion done;
+ int cpu;
+};
+
+static void __cpuinit do_fork_idle(struct work_struct *work)
+{
+ struct create_idle *c_idle =
+ container_of(work, struct create_idle, work);
+
+ c_idle->idle = fork_idle(c_idle->cpu);
+ complete(&c_idle->done);
+}
+
int __cpuinit __cpu_up(unsigned int cpu)
{
struct task_struct *idle;
@@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu)
* Linux can schedule processes on this slave.
*/
if (!cpu_idle_thread[cpu]) {
- idle = fork_idle(cpu);
- cpu_idle_thread[cpu] = idle;
+ /*
+ * Schedule work item to avoid forking user task
+ * Ported from arch/x86/kernel/smpboot.c
+ */
+ struct create_idle c_idle = {
+ .cpu = cpu,
+ .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+ };
+
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
+ schedule_work(&c_idle.work);
+ wait_for_completion(&c_idle.done);
+ idle = cpu_idle_thread[cpu] = c_idle.idle;
if (IS_ERR(idle))
panic(KERN_ERR "Fork failed for CPU %d", cpu);
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edff45e..58beabf50b3 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -383,12 +383,11 @@ save_static_function(sys_sysmips);
static int __used noinline
_sys_sysmips(nabi_no_regargs struct pt_regs regs)
{
- long cmd, arg1, arg2, arg3;
+ long cmd, arg1, arg2;
cmd = regs.regs[4];
arg1 = regs.regs[5];
arg2 = regs.regs[6];
- arg3 = regs.regs[7];
switch (cmd) {
case MIPS_ATOMIC_SET:
@@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
if (arg1 & 2)
set_thread_flag(TIF_LOGADE);
else
- clear_thread_flag(TIF_FIXADE);
+ clear_thread_flag(TIF_LOGADE);
return 0;
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fd..ab52b7cf3b6 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
spinlock_t tc_list_lock;
struct list_head tc_list; /* Thread contexts */
} vpecontrol = {
- .vpe_list_lock = SPIN_LOCK_UNLOCKED,
+ .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
.vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
- .tc_list_lock = SPIN_LOCK_UNLOCKED,
+ .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
.tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
};
diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 6e1b77fec7e..aca93eed877 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig
@@ -1,6 +1,7 @@
+if MACH_LOONGSON
+
choice
prompt "Machine Type"
- depends on MACH_LOONGSON
config LEMOTE_FULOONG2E
bool "Lemote Fuloong(2e) mini-PC"
@@ -87,3 +88,5 @@ config LOONGSON_UART_BASE
config LOONGSON_MC146818
bool
default n
+
+endif # MACH_LOONGSON
diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c
index 1a06defc4f7..353e1d2e41a 100644
--- a/arch/mips/loongson/common/cmdline.c
+++ b/arch/mips/loongson/common/cmdline.c
@@ -44,10 +44,5 @@ void __init prom_init_cmdline(void)
strcat(arcs_cmdline, " ");
}
- if ((strstr(arcs_cmdline, "console=")) == NULL)
- strcat(arcs_cmdline, " console=ttyS0,115200");
- if ((strstr(arcs_cmdline, "root=")) == NULL)
- strcat(arcs_cmdline, " root=/dev/hda1");
-
prom_init_machtype();
}
diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c
index 81fbe6b73f9..2efd5d9dee2 100644
--- a/arch/mips/loongson/common/machtype.c
+++ b/arch/mips/loongson/common/machtype.c
@@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void)
void __init prom_init_machtype(void)
{
- char *p, str[MACHTYPE_LEN];
+ char *p, str[MACHTYPE_LEN + 1];
int machtype = MACH_LEMOTE_FL2E;
mips_machtype = LOONGSON_MACHTYPE;
@@ -53,6 +53,7 @@ void __init prom_init_machtype(void)
}
p += strlen("machtype=");
strncpy(str, p, MACHTYPE_LEN);
+ str[MACHTYPE_LEN] = '\0';
p = strstr(str, " ");
if (p)
*p = '\0';
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 2701d950095..2a7d43f4f16 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -70,7 +70,7 @@
#define COMPXSP \
- unsigned xm; int xe; int xs; int xc
+ unsigned xm; int xe; int xs __maybe_unused; int xc
#define COMPYSP \
unsigned ym; int ye; int ys; int yc
@@ -104,7 +104,7 @@
#define COMPXDP \
-u64 xm; int xe; int xs; int xc
+u64 xm; int xe; int xs __maybe_unused; int xc
#define COMPYDP \
u64 ym; int ye; int ys; int yc
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd24c82..279599e9a77 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long lastpfn;
+ unsigned long lastpfn __maybe_unused;
pagetable_init();
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 083d3412d0b..04f9e17db9d 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -109,6 +109,8 @@ static bool scratchpad_available(void)
static int scratchpad_offset(int i)
{
BUG();
+ /* Really unreachable, but evidently some GCC want this. */
+ return 0;
}
#endif
/*
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index b7c03d80c88..68798f869c0 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -308,7 +308,7 @@ static struct resource pci_mem_resource = {
* RETURNS: PCIBIOS_SUCCESSFUL - success
*
****************************************************************************/
-static int bpci_interrupt(int irq, void *dev_id)
+static irqreturn_t bpci_interrupt(int irq, void *dev_id)
{
struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
unsigned int stat = preg->if_status;
@@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id)
/* write to clear all asserted interrupts */
preg->if_status = stat;
- return PCIBIOS_SUCCESSFUL;
+ return IRQ_HANDLED;
}
/*****************************************************************************
diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index c139988bb85..8d798497c61 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig
@@ -4,15 +4,11 @@ choice
config PMC_MSP4200_EVAL
bool "PMC-Sierra MSP4200 Eval Board"
- select CEVT_R4K
- select CSRC_R4K
select IRQ_MSP_SLP
select HW_HAS_PCI
config PMC_MSP4200_GW
bool "PMC-Sierra MSP4200 VoIP Gateway"
- select CEVT_R4K
- select CSRC_R4K
select IRQ_MSP_SLP
select HW_HAS_PCI
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index cca64e15f57..01df84ce31e 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -81,7 +81,7 @@ void __init plat_time_init(void)
mips_hpt_frequency = cpu_rate/2;
}
-unsigned int __init get_c0_compare_int(void)
+unsigned int __cpuinit get_c0_compare_int(void)
{
return MSP_INT_VPE0_TIMER;
}
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 92d2f9298e3..9d773a63951 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
* Atomically reads the value of @v. Note that the guaranteed
* useful range of an atomic_t is only 24 bits.
*/
-#define atomic_read(v) ((v)->counter)
+#define atomic_read(v) (ACCESS_ONCE((v)->counter))
/**
* atomic_set - set atomic variable
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 679dee0bbd0..3d6e60dad9d 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; };
#define __get_user_check(x, ptr, size) \
({ \
+ const __typeof__(ptr) __guc_ptr = (ptr); \
int _e; \
- if (likely(__access_ok((unsigned long) (ptr), (size)))) \
- _e = __get_user_nocheck((x), (ptr), (size)); \
+ if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \
+ _e = __get_user_nocheck((x), __guc_ptr, (size)); \
else { \
_e = -EFAULT; \
(x) = (__typeof__(x))0; \
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 75da468090b..5b955000626 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
unsigned tsc, elapse;
irqreturn_t ret;
- write_seqlock(&xtime_lock);
-
while (tsc = get_cycles(),
elapse = tsc - mn10300_last_tsc, /* time elapsed since last
* tick */
@@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
mn10300_last_tsc += MN10300_TSC_PER_HZ;
/* advance the kernel's time tracking system */
- do_timer(1);
+ xtime_update(1);
}
- write_sequnlock(&xtime_lock);
-
ret = local_timer_interrupt();
#ifdef CONFIG_SMP
send_IPI_allbutself(LOCAL_TIMER_IPI);
diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c
index a8933a60b2d..a6b63dde603 100644
--- a/arch/mn10300/mm/cache-inv-icache.c
+++ b/arch/mn10300/mm/cache-inv-icache.c
@@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end)
/* invalidate the icache coverage on that region */
mn10300_local_icache_inv_range2(addr + off, size);
- smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end);
+ smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
}
/**
@@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
* directly */
start_page = (start >= 0x80000000UL) ? start : 0x80000000UL;
mn10300_icache_inv_range(start_page, end);
- smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end);
+ smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
if (start_page == start)
goto done;
end = start_page;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 30394081d9b..6ab9580b0b0 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
int16_t f_pad;
};
-static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
+static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- memset(buf, 0, sizeof(*buf));
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid[0] = st.f_fsid.val[0];
- buf->f_fsid[1] = st.f_fsid.val[1];
-
+ struct hpux_statfs buf;
+ memset(&buf, 0, sizeof(buf));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid[0] = st->f_fsid.val[0];
+ buf.f_fsid[1] = st->f_fsid.val[1];
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
asmlinkage long hpux_statfs(const char __user *pathname,
struct hpux_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct hpux_statfs tmp;
- error = do_statfs_hpux(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_hpux(&st, buf);
return error;
}
asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
{
- struct file *file;
- struct hpux_statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_hpux(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
- out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_hpux(&st, buf);
return error;
}
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55e..67a33cc27ef 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -8,7 +8,7 @@
#include <asm/errno.h>
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
/* Non-atomic version */
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int err = 0;
- int uval;
+ u32 val;
/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
* our gateway page, and causes no end of trouble...
@@ -62,15 +62,15 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
return -EFAULT;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- err = get_user(uval, uaddr);
- if (err) return -EFAULT;
- if (uval == oldval)
- err = put_user(newval, uaddr);
- if (err) return -EFAULT;
- return uval;
+ if (get_user(val, uaddr))
+ return -EFAULT;
+ if (val == oldval && put_user(newval, uaddr))
+ return -EFAULT;
+ *uval = val;
+ return 0;
}
#endif /*__KERNEL__*/
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 05511ccb61d..45b7389d77a 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
update_process_times(user_mode(get_irq_regs()));
}
- if (cpu == 0) {
- write_seqlock(&xtime_lock);
- do_timer(ticks_elapsed);
- write_sequnlock(&xtime_lock);
- }
+ if (cpu == 0)
+ xtime_update(ticks_elapsed);
return IRQ_HANDLED;
}
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 7c589ef81fb..c94e4a3fe2e 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -30,7 +30,7 @@
: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
: "cr0", "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -40,7 +40,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -82,35 +82,38 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int prev;
+ int ret = 0;
+ u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
__asm__ __volatile__ (
PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\
- cmpw 0,%0,%3\n\
+"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
+ cmpw 0,%1,%4\n\
bne- 3f\n"
- PPC405_ERR77(0,%2)
-"2: stwcx. %4,0,%2\n\
+ PPC405_ERR77(0,%3)
+"2: stwcx. %5,0,%3\n\
bne- 1b\n"
PPC_ACQUIRE_BARRIER
"3: .section .fixup,\"ax\"\n\
-4: li %0,%5\n\
+4: li %0,%6\n\
b 3b\n\
.previous\n\
.section __ex_table,\"a\"\n\
.align 3\n\
" PPC_LONG "1b,4b,2b,4b\n\
.previous" \
- : "=&r" (prev), "+m" (*uaddr)
+ : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
: "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
: "cc", "memory");
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 380d48bacd1..26b8c807f8f 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -33,9 +33,25 @@
//
//----------------------------------------------------------------------------
#include <linux/cache.h>
+#include <linux/threads.h>
#include <asm/types.h>
#include <asm/mmu.h>
+/*
+ * We only have to have statically allocated lppaca structs on
+ * legacy iSeries, which supports at most 64 cpus.
+ */
+#ifdef CONFIG_PPC_ISERIES
+#if NR_CPUS < 64
+#define NR_LPPACAS NR_CPUS
+#else
+#define NR_LPPACAS 64
+#endif
+#else /* not iSeries */
+#define NR_LPPACAS 1
+#endif
+
+
/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
* alignment is sufficient to prevent this */
struct lppaca {
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 51e9e6f90d1..edeb80fdd2c 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -171,6 +171,16 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
return bus->sysdata;
}
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ struct pci_controller *host;
+
+ if (bus->self)
+ return pci_device_to_OF_node(bus->self);
+ host = pci_bus_to_host(bus);
+ return host ? host->dn : NULL;
+}
+
static inline int isa_vaddr_is_ioport(void __iomem *address)
{
/* No specific ISA handling on ppc32 at this stage, it
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index d7275758559..c189aa5fe1f 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -70,21 +70,6 @@ static inline int of_node_to_nid(struct device_node *device) { return 0; }
#endif
#define of_node_to_nid of_node_to_nid
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-struct of_irq;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
extern void of_instantiate_rtc(void);
/* These includes are put at the bottom because they may contain things
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe7..bb1e2cdeb9b 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
* by Paul Mackerras <paulus@samba.org>.
*/
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
/*
* the semaphore definition
*/
@@ -33,47 +28,6 @@
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-struct rw_semaphore {
- long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, \
- __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
- } while (0)
-
/*
* lock for reading
*/
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return sem->count != 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ebf9846f3c3..f4adf89d761 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,20 +27,6 @@ extern unsigned long __toc_start;
#ifdef CONFIG_PPC_BOOK3S
/*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#ifdef CONFIG_PPC_ISERIES
-#if NR_CPUS < 64
-#define NR_LPPACAS NR_CPUS
-#else
-#define NR_LPPACAS 64
-#endif
-#else /* not iSeries */
-#define NR_LPPACAS 1
-#endif
-
-/*
* The structure which the hypervisor knows about - this structure
* should not cross a page boundary. The vpa_init/register_vpa call
* is now known to fail if the lppaca structure crosses a page
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 10a44e68ef1..eb341be9a4d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <linux/mm.h>
#include <linux/list.h>
#include <linux/syscalls.h>
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index c2b7a07cc3d..47187cc2cf0 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -2,95 +2,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/pci_regs.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
#include <linux/of_address.h>
#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#ifdef CONFIG_PCI
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
-{
- struct device_node *dn, *ppnode;
- struct pci_dev *ppdev;
- u32 lspec;
- u32 laddr[3];
- u8 pin;
- int rc;
-
- /* Check if we have a device node, if yes, fallback to standard OF
- * parsing
- */
- dn = pci_device_to_OF_node(pdev);
- if (dn) {
- rc = of_irq_map_one(dn, 0, out_irq);
- if (!rc)
- return rc;
- }
-
- /* Ok, we don't, time to have fun. Let's start by building up an
- * interrupt spec. we assume #interrupt-cells is 1, which is standard
- * for PCI. If you do different, then don't use that routine.
- */
- rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
- if (rc != 0)
- return rc;
- /* No pin, exit */
- if (pin == 0)
- return -ENODEV;
-
- /* Now we walk up the PCI tree */
- lspec = pin;
- for (;;) {
- /* Get the pci_dev of our parent */
- ppdev = pdev->bus->self;
-
- /* Ouch, it's a host bridge... */
- if (ppdev == NULL) {
-#ifdef CONFIG_PPC64
- ppnode = pci_bus_to_OF_node(pdev->bus);
-#else
- struct pci_controller *host;
- host = pci_bus_to_host(pdev->bus);
- ppnode = host ? host->dn : NULL;
-#endif
- /* No node for host bridge ? give up */
- if (ppnode == NULL)
- return -EINVAL;
- } else
- /* We found a P2P bridge, check if it has a node */
- ppnode = pci_device_to_OF_node(ppdev);
-
- /* Ok, we have found a parent with a device-node, hand over to
- * the OF parsing code.
- * We build a unit address from the linux device to be used for
- * resolution. Note that we use the linux bus number which may
- * not match your firmware bus numbering.
- * Fortunately, in most cases, interrupt-map-mask doesn't include
- * the bus number as part of the matching.
- * You should still be careful about that though if you intend
- * to rely on this function (you ship a firmware that doesn't
- * create device nodes for all PCI devices).
- */
- if (ppnode)
- break;
-
- /* We can only get here if we hit a P2P bridge with no node,
- * let's do standard swizzling and try again
- */
- lspec = pci_swizzle_interrupt_pin(pdev, lspec);
- pdev = ppdev;
- }
-
- laddr[0] = (pdev->bus->number << 16)
- | (pdev->devfn << 8);
- laddr[1] = laddr[2] = 0;
- return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
-}
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
-#endif /* CONFIG_PCI */
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fd481232957..0dc95c0aa3b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1516,7 +1516,8 @@ int start_topology_update(void)
{
int rc = 0;
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ /* Disabled until races with load balancing are fixed */
+ if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
get_lppaca()->shared_proc) {
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d32f86..a3d2ce54ea2 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
if (!IS_ERR(tmp)) {
struct nameidata nd;
- ret = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ ret = kern_path_parent(tmp, &nd);
if (!ret) {
nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index fdb7384c0c4..f0491cc2890 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */
pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
- for (i = 0; i < NR_CPUS; i++) {
- if (lppaca_of(i).dyn_proc_status >= 2)
+ for (i = 0; i < NR_LPPACAS; i++) {
+ if (lppaca[i].dyn_proc_status >= 2)
continue;
snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
dt_prop_str(dt, "device_type", device_type_cpu);
- index = lppaca_of(i).dyn_hv_phys_proc_index;
+ index = lppaca[i].dyn_hv_phys_proc_index;
d = &xIoHriProcessorVpd[index];
dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b0863410517..2946ae10fbf 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void)
* on but calling this function multiple times is fine.
*/
identify_cpu(0, mfspr(SPRN_PVR));
+ initialise_paca(&boot_paca, 0);
powerpc_firmware_features |= FW_FEATURE_ISERIES;
powerpc_firmware_features |= FW_FEATURE_LPAR;
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5c5d02de49e..81cf36b691f 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -7,7 +7,7 @@
#include <linux/uaccess.h>
#include <asm/errno.h>
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr,
- int oldval, int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval);
+ return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
}
#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322..d0eb4653ceb 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifndef __s390x__
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
- * initialisation
- */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-
-/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return new;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d6b1ed0ec52..2d9ea11f919 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,8 +83,8 @@ struct uaccess_ops {
size_t (*clear_user)(size_t, void __user *);
size_t (*strnlen_user)(size_t, const char __user *);
size_t (*strncpy_from_user)(size_t, const char __user *, char *);
- int (*futex_atomic_op)(int op, int __user *, int oparg, int *old);
- int (*futex_atomic_cmpxchg)(int __user *, int old, int new);
+ int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
+ int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
};
extern struct uaccess_ops uaccess;
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 126011df14f..1d2536cb630 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *);
extern size_t copy_to_user_std(size_t, void __user *, const void *);
extern size_t strnlen_user_std(size_t, const char __user *);
extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
-extern int futex_atomic_cmpxchg_std(int __user *, int, int);
-extern int futex_atomic_op_std(int, int __user *, int, int *);
+extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
+extern int futex_atomic_op_std(int, u32 __user *, int, int *);
extern size_t copy_from_user_pt(size_t, const void __user *, void *);
extern size_t copy_to_user_pt(size_t, void __user *, const void *);
-extern int futex_atomic_op_pt(int, int __user *, int, int *);
-extern int futex_atomic_cmpxchg_pt(int __user *, int, int);
+extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
+extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 404f2de296d..74833831417 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,7 +302,7 @@ fault:
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
"m" (*uaddr) : "cc" );
-static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
{
int oldval = 0, newval, ret;
@@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
return ret;
}
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
{
int ret;
@@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
return ret;
}
-static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
int ret;
asm volatile("0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
+ "1: la %0,0\n"
"2:\n"
EX_TABLE(0b,2b) EX_TABLE(1b,2b)
: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory" );
+ *uval = oldval;
return ret;
}
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
int ret;
if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+ return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
spin_lock(&current->mm->page_table_lock);
uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
if (!uaddr) {
@@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
}
get_page(virt_to_page(uaddr));
spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+ ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
put_page(virt_to_page(uaddr));
return ret;
}
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index a6c4f7ed24a..bb1a7eed42c 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
"m" (*uaddr) : "cc");
-int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
{
int oldval = 0, newval, ret;
@@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
return ret;
}
-int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval)
+int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
int ret;
asm volatile(
" sacf 256\n"
"0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
+ "1: la %0,0\n"
"2: sacf 0\n"
EX_TABLE(0b,2b) EX_TABLE(1b,2b)
: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory" );
+ *uval = oldval;
return ret;
}
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index a9f16a7f9ae..6cb9f193a95 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -3,7 +3,7 @@
#include <asm/system.h>
-static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -20,7 +20,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -37,7 +37,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -54,7 +54,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -71,7 +71,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -88,11 +88,13 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
- int oldval, int newval)
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+ u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
unsigned long flags;
- int ret, prev = 0;
+ int ret;
+ u32 prev = 0;
local_irq_save(flags);
@@ -102,10 +104,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
local_irq_restore(flags);
- if (ret)
- return ret;
-
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __ASM_SH_FUTEX_IRQ_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 68256ec5fa3..7be39a646fb 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -10,7 +10,7 @@
/* XXX: UP variants, fix for SH-4A and SMP.. */
#include <asm/futex-irq.h>
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -21,7 +21,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval);
+ return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
}
#endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e4..edab5726529 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
#endif
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 47f95839dc6..444e7bea23b 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -30,7 +30,7 @@
: "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
: "memory")
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -38,7 +38,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
- if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int))))
+ if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
return -EFAULT;
if (unlikely((((unsigned long) uaddr) & 0x3UL)))
return -EINVAL;
@@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
+ int ret = 0;
+
__asm__ __volatile__(
- "\n1: casa [%3] %%asi, %2, %0\n"
+ "\n1: casa [%4] %%asi, %3, %1\n"
"2:\n"
" .section .fixup,#alloc,#execinstr\n"
" .align 4\n"
"3: sethi %%hi(2b), %0\n"
" jmpl %0 + %%lo(2b), %%g0\n"
- " mov %4, %0\n"
+ " mov %5, %0\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
" .align 4\n"
" .word 1b, 3b\n"
" .previous\n"
- : "=r" (newval)
- : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
+ : "+r" (ret), "=r" (newval)
+ : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
: "memory");
- return newval;
+ *uval = newval;
+ return ret;
}
#endif /* !(_SPARC64_FUTEX_H) */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869b..069bf4d663a 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_ACTIVE_MASK 0xffffffffL
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
/*
* lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index aeaa09a3c65..2cdc131b50a 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void)
static irqreturn_t pcic_timer_handler (int irq, void *h)
{
- write_seqlock(&xtime_lock); /* Dummy, to show that we remember */
pcic_clear_clock_irq();
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 9c743b1886f..4211bfc9bca 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#define TICK_SIZE (tick_nsec / 1000)
@@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
profile_tick(CPU_PROFILING);
#endif
- /* Protect counter clear so that do_gettimeoffset works */
- write_seqlock(&xtime_lock);
-
clear_clock_irq();
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffd..d3c7a12ad87 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
- [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
+ [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
};
#else /* SMP */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index fe0d10dcae5..d03ec124a59 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -29,16 +29,16 @@
#include <linux/uaccess.h>
#include <linux/errno.h>
-extern struct __get_user futex_set(int __user *v, int i);
-extern struct __get_user futex_add(int __user *v, int n);
-extern struct __get_user futex_or(int __user *v, int n);
-extern struct __get_user futex_andn(int __user *v, int n);
-extern struct __get_user futex_cmpxchg(int __user *v, int o, int n);
+extern struct __get_user futex_set(u32 __user *v, int i);
+extern struct __get_user futex_add(u32 __user *v, int n);
+extern struct __get_user futex_or(u32 __user *v, int n);
+extern struct __get_user futex_andn(u32 __user *v, int n);
+extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
#ifndef __tilegx__
-extern struct __get_user futex_xor(int __user *v, int n);
+extern struct __get_user futex_xor(u32 __user *v, int n);
#else
-static inline struct __get_user futex_xor(int __user *uaddr, int n)
+static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
{
struct __get_user asm_ret = __get_user_4(uaddr);
if (!asm_ret.err) {
@@ -53,7 +53,7 @@ static inline struct __get_user futex_xor(int __user *uaddr, int n)
}
#endif
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -119,16 +119,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
- int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
struct __get_user asm_ret;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
asm_ret = futex_cmpxchg(uaddr, oldval, newval);
- return asm_ret.err ? asm_ret.err : asm_ret.val;
+ *uval = asm_ret.val;
+ return asm_ret.err;
}
#ifndef __tilegx__
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index e351e14b433..1e78940218c 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
bool
default y
select HAVE_GENERIC_HARDIRQS
+ select GENERIC_HARDIRQS_NO_DEPRECATED
config MMU
bool
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
index 5ee328099c6..02fb017fed4 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -10,6 +10,8 @@ endmenu
config UML_X86
def_bool y
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_FIND_NEXT_BIT
config 64BIT
bool
@@ -19,6 +21,9 @@ config X86_32
def_bool !64BIT
select HAVE_AOUT
+config X86_64
+ def_bool 64BIT
+
config RWSEM_XCHGADD_ALGORITHM
def_bool X86_XADD
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b23dc..c70e047eed7 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
#if 0
void mconsole_proc(struct mc_request *req)
{
- struct nameidata nd;
struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
struct file *file;
- int n, err;
+ int n;
char *ptr = req->request.data, *buf;
mm_segment_t old_fs = get_fs();
ptr += strlen("proc");
ptr = skip_spaces(ptr);
- err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd);
- if (err) {
- mconsole_reply(req, "Failed to look up file", 1, 0);
- goto out;
- }
-
- err = may_open(&nd.path, MAY_READ, O_RDONLY);
- if (result) {
- mconsole_reply(req, "Failed to open file", 1, 0);
- path_put(&nd.path);
- goto out;
- }
-
- file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
- current_cred());
- err = PTR_ERR(file);
+ file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
- path_put(&nd.path);
goto out;
}
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c..620f5b70957 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
.no_cow = 0, \
.shared = 0, \
.cow = DEFAULT_COW, \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
.request = NULL, \
.start_sg = 0, \
.end_sg = 0, \
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3f0ac9e0c96..64cfea80cfe 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,8 +35,10 @@ int show_interrupts(struct seq_file *p, void *v)
}
if (i < NR_IRQS) {
- raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ struct irq_desc *desc = irq_to_desc(i);
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -46,7 +48,7 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
- seq_printf(p, " %14s", irq_desc[i].chip->name);
+ seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
@@ -54,7 +56,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
skip:
- raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS)
seq_putc(p, '\n');
@@ -360,10 +362,10 @@ EXPORT_SYMBOL(um_request_irq);
EXPORT_SYMBOL(reactivate_fd);
/*
- * irq_chip must define (startup || enable) &&
- * (shutdown || disable) && end
+ * irq_chip must define at least enable/disable and ack when
+ * the edge handler is used.
*/
-static void dummy(unsigned int irq)
+static void dummy(struct irq_data *d)
{
}
@@ -371,20 +373,17 @@ static void dummy(unsigned int irq)
static struct irq_chip normal_irq_type = {
.name = "SIGIO",
.release = free_irq_by_irq_and_dev,
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
static struct irq_chip SIGVTALRM_irq_type = {
.name = "SIGVTALRM",
.release = free_irq_by_irq_and_dev,
- .shutdown = dummy, /* never called */
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
void __init init_IRQ(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d5ed94d30aa..f8958b01b97 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,8 +64,12 @@ config X86
select HAVE_TEXT_POKE_SMP
select HAVE_GENERIC_HARDIRQS
select HAVE_SPARSE_IRQ
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_FIND_NEXT_BIT
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
+ select GENERIC_IRQ_SHOW
+ select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
config INSTRUCTION_DECODER
@@ -382,6 +386,8 @@ config X86_INTEL_CE
depends on X86_32
depends on X86_EXTENDED_PLATFORM
select X86_REBOOTFIXUPS
+ select OF
+ select OF_EARLY_FLATTREE
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@@ -811,7 +817,7 @@ config X86_LOCAL_APIC
config X86_IO_APIC
def_bool y
- depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
config X86_VISWS_APIC
def_bool y
@@ -1705,7 +1711,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
- def_bool X86_64
+ def_bool y
depends on NUMA
menu "Power management and ACPI options"
@@ -2066,9 +2072,10 @@ config SCx200HR_TIMER
config OLPC
bool "One Laptop Per Child support"
+ depends on !X86_PAE
select GPIOLIB
- select OLPC_OPENFIRMWARE
- depends on !X86_64 && !X86_PAE
+ select OF
+ select OF_PROMTREE if PROC_DEVICETREE
---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -2079,21 +2086,6 @@ config OLPC_XO1
---help---
Add support for non-essential features of the OLPC XO-1 laptop.
-config OLPC_OPENFIRMWARE
- bool "Support for OLPC's Open Firmware"
- depends on !X86_64 && !X86_PAE
- default n
- select OF
- help
- This option adds support for the implementation of Open Firmware
- that is used on the OLPC XO-1 Children's Machine.
- If unsure, say N here.
-
-config OLPC_OPENFIRMWARE_DT
- bool
- default y if OLPC_OPENFIRMWARE && PROC_DEVICETREE
- select OF_PROMTREE
-
endif # X86_32
config AMD_NB
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 283c5a6a03a..ed47e6e1747 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,11 +294,6 @@ config X86_GENERIC
endif
-config X86_CPU
- def_bool y
- select GENERIC_FIND_FIRST_BIT
- select GENERIC_FIND_NEXT_BIT
-
#
# Define implied options from the CPU selection here
config X86_INTERNODE_CACHE_SHIFT
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5f..46a82388243 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
if (fseek(f, -4L, SEEK_END)) {
perror(argv[1]);
}
- fread(&olen, sizeof olen, 1, f);
+
+ if (fread(&olen, sizeof(olen), 1, f) != 1) {
+ perror(argv[1]);
+ return 1;
+ }
+
ilen = ftell(f);
olen = getle32(&olen);
fclose(f);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index f729b2e9679..430312ba6e3 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -128,26 +128,20 @@ ENTRY(ia32_sysenter_target)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %ebp,%ebp /* zero extension */
- pushq $__USER32_DS
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__USER32_DS
/*CFI_REL_OFFSET ss,0*/
- pushq %rbp
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rbp
CFI_REL_OFFSET rsp,0
- pushfq
- CFI_ADJUST_CFA_OFFSET 8
+ pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
CFI_REGISTER rip,r10
- pushq $__USER32_CS
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
movl %eax, %eax
- pushq %r10
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %r10
CFI_REL_OFFSET rip,0
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
@@ -184,11 +178,9 @@ sysexit_from_sys_call:
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
- popfq
- CFI_ADJUST_CFA_OFFSET -8
+ popfq_cfi
/*CFI_RESTORE rflags*/
- popq %rcx /* User %esp */
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx /* User %esp */
CFI_REGISTER rsp,rcx
TRACE_IRQS_ON
ENABLE_INTERRUPTS_SYSEXIT32
@@ -423,8 +415,7 @@ ENTRY(ia32_syscall)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%eax
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
@@ -853,4 +844,7 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad sys_name_to_handle_at
+ .quad compat_sys_open_by_handle_at
+ .quad compat_sys_clock_adjtime
ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4ea15ca89b2..b964ec45754 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -186,15 +186,7 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
-extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
- unsigned long end);
-extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-#ifdef CONFIG_NUMA_EMU
-extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes);
-#endif
+extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f..e264ae5a144 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -9,23 +9,20 @@ struct amd_nb_bus_dev_range {
u8 dev_limit;
};
-extern struct pci_device_id amd_nb_misc_ids[];
+extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
-extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int amd_scan_nodes(void);
-
-#ifdef CONFIG_NUMA_EMU
-extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
-extern void amd_get_nodes(struct bootnode *nodes);
-#endif
+extern int amd_numa_init(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, int);
struct amd_northbridge {
struct pci_dev *misc;
+ struct pci_dev *link;
};
struct amd_northbridge_info {
@@ -37,6 +34,7 @@ extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART 0x1
#define AMD_NB_L3_INDEX_DISABLE 0x2
+#define AMD_NB_L3_PARTITIONING 0x4
#ifdef CONFIG_AMD_NB
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3c896946f4c..a279d98ea95 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -220,7 +220,6 @@ extern void enable_IR_x2apic(void);
extern int get_physical_broadcast(void);
-extern void apic_disable(void);
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC(void);
@@ -228,7 +227,6 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
extern int verify_local_APIC(void);
-extern void cache_APIC_registers(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void setup_local_APIC(void);
@@ -239,8 +237,7 @@ void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern int APIC_init_uniprocessor(void);
-extern void enable_NMI_through_LVT0(void);
-extern int apic_force_enable(void);
+extern int apic_force_enable(unsigned long addr);
/*
* On 32bit this is mach-xxx local
@@ -261,7 +258,6 @@ static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
-static inline void apic_disable(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
#endif /* !CONFIG_X86_LOCAL_APIC */
@@ -307,8 +303,6 @@ struct apic {
void (*setup_apic_routing)(void);
int (*multi_timer_check)(int apic, int irq);
- int (*apicid_to_node)(int logical_apicid);
- int (*cpu_to_logical_apicid)(int cpu);
int (*cpu_present_to_apicid)(int mps_cpu);
void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
void (*setup_portio_remap)(void);
@@ -356,6 +350,23 @@ struct apic {
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
u32 (*safe_wait_icr_idle)(void);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Called very early during boot from get_smp_config(). It should
+ * return the logical apicid. x86_[bios]_cpu_to_apicid is
+ * initialized before this function is called.
+ *
+ * If logical apicid can't be determined that early, the function
+ * may return BAD_APICID. Logical apicid will be configured after
+ * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
+ * won't be applied properly during early boot in this case.
+ */
+ int (*x86_32_early_logical_apicid)(int cpu);
+
+ /* determine CPU -> NUMA node mapping */
+ int (*x86_32_numa_cpu_node)(int cpu);
+#endif
};
/*
@@ -503,6 +514,11 @@ extern struct apic apic_noop;
extern struct apic apic_default;
+static inline int noop_x86_32_early_logical_apicid(int cpu)
+{
+ return BAD_APICID;
+}
+
/*
* Set up the logical destination ID.
*
@@ -522,7 +538,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-extern int default_apicid_to_node(int logical_apicid);
+extern int default_x86_32_numa_cpu_node(int cpu);
#endif
@@ -558,12 +574,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
*retmap = *phys_map;
}
-/* Mapping from cpu number to logical apicid */
-static inline int default_cpu_to_logical_apicid(int cpu)
-{
- return 1 << cpu;
-}
-
static inline int __default_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -596,8 +606,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_X86_32
-extern u8 cpu_2_logical_apicid[NR_CPUS];
-#endif
-
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 47a30ff8e51..d87988bacf3 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -426,4 +426,16 @@ struct local_apic {
#else
#define BAD_APICID 0xFFFFu
#endif
+
+enum ioapic_irq_destination_types {
+ dest_Fixed = 0,
+ dest_LowestPrio = 1,
+ dest_SMI = 2,
+ dest__reserved_1 = 3,
+ dest_NMI = 4,
+ dest_INIT = 5,
+ dest__reserved_2 = 6,
+ dest_ExtINT = 7
+};
+
#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index c8bfe63a06d..e020d88ec02 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -12,6 +12,7 @@
/* setup data types */
#define SETUP_NONE 0
#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
/* extensible setup data list node */
struct setup_data {
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 00000000000..e656ad8c0a2
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_CE4100_H_
+#define _ASM_CE4100_H_
+
+int ce4100_pci_init(void);
+
+#endif
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index e99d55d74df..908b96957d8 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -96,7 +96,7 @@ extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data;
-extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
+extern void parse_e820_ext(struct setup_data *data);
#if defined(CONFIG_X86_64) || \
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab4a5f..1cd6d26a0a8 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
BUILD_INTERRUPT3(invalidate_interrupt\idx,
(INVALIDATE_TLB_VECTOR_START)+\idx,
smp_invalidate_interrupt)
+.endif
.endr
#endif
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 06850a7194e..2c6fc9e6281 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -7,14 +7,12 @@
frame pointer later */
#ifdef CONFIG_FRAME_POINTER
.macro FRAME
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp,0
movl %esp,%ebp
.endm
.macro ENDFRAME
- popl %ebp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebp
CFI_RESTORE ebp
.endm
#else
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 1f11ce44e95..d09bb03653f 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -37,7 +37,7 @@
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
- int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
+ int ret = 0;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
/* Real i386 machines have no cmpxchg instruction */
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return -ENOSYS;
#endif
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
+ asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
"2:\t.section .fixup, \"ax\"\n"
- "3:\tmov %2, %0\n"
+ "3:\tmov %3, %0\n"
"\tjmp 2b\n"
"\t.previous\n"
_ASM_EXTABLE(1b, 3b)
- : "=a" (oldval), "+m" (*uaddr)
- : "i" (-EFAULT), "r" (newval), "0" (oldval)
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+ : "i" (-EFAULT), "r" (newval), "1" (oldval)
: "memory"
);
- return oldval;
+ *uval = oldval;
+ return ret;
}
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5a7e6..bb9efe8706e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void);
extern void invalidate_interrupt5(void);
extern void invalidate_interrupt6(void);
extern void invalidate_interrupt7(void);
+extern void invalidate_interrupt8(void);
+extern void invalidate_interrupt9(void);
+extern void invalidate_interrupt10(void);
+extern void invalidate_interrupt11(void);
+extern void invalidate_interrupt12(void);
+extern void invalidate_interrupt13(void);
+extern void invalidate_interrupt14(void);
+extern void invalidate_interrupt15(void);
+extern void invalidate_interrupt16(void);
+extern void invalidate_interrupt17(void);
+extern void invalidate_interrupt18(void);
+extern void invalidate_interrupt19(void);
+extern void invalidate_interrupt20(void);
+extern void invalidate_interrupt21(void);
+extern void invalidate_interrupt22(void);
+extern void invalidate_interrupt23(void);
+extern void invalidate_interrupt24(void);
+extern void invalidate_interrupt25(void);
+extern void invalidate_interrupt26(void);
+extern void invalidate_interrupt27(void);
+extern void invalidate_interrupt28(void);
+extern void invalidate_interrupt29(void);
+extern void invalidate_interrupt30(void);
+extern void invalidate_interrupt31(void);
extern void irq_move_cleanup_interrupt(void);
extern void reboot_interrupt(void);
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 36fb1a6a510..8dbe353e41e 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -11,8 +11,8 @@ kernel_physical_mapping_init(unsigned long start,
unsigned long page_size_mask);
-extern unsigned long __initdata e820_table_start;
-extern unsigned long __meminitdata e820_table_end;
-extern unsigned long __meminitdata e820_table_top;
+extern unsigned long __initdata pgt_buf_start;
+extern unsigned long __meminitdata pgt_buf_end;
+extern unsigned long __meminitdata pgt_buf_top;
#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index f327d386d6c..c4bd267dfc5 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -63,17 +63,6 @@ union IO_APIC_reg_03 {
} __attribute__ ((packed)) bits;
};
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
struct IO_APIC_route_entry {
__u32 vector : 8,
delivery_mode : 3, /* 000: FIXED
@@ -106,6 +95,10 @@ struct IR_IO_APIC_route_entry {
index : 15;
} __attribute__ ((packed));
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
#ifdef CONFIG_X86_IO_APIC
/*
@@ -150,11 +143,6 @@ extern int timer_through_8259;
#define io_apic_assign_pci_irqs \
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-extern u8 io_apic_unique_id(u8 id);
-extern int io_apic_get_unique_id(int ioapic, int apic_id);
-extern int io_apic_get_version(int ioapic);
-extern int io_apic_get_redir_entries(int ioapic);
-
struct io_apic_irq_attr;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
@@ -162,6 +150,8 @@ void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_and_gsi_init(void);
extern void ioapic_insert_resources(void);
+int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
+
extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
@@ -186,6 +176,8 @@ extern void __init pre_init_apic_IRQ0(void);
extern void mp_save_irq(struct mpc_intsrc *m);
+extern void disable_ioapic_support(void);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
@@ -199,6 +191,26 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; }
struct io_apic_irq_attr;
static inline int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr) { return 0; }
+
+static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+ return NULL;
+}
+
+static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { }
+static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent)
+{
+ return -ENOMEM;
+}
+
+static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { }
+static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent)
+{
+ return -ENOMEM;
+}
+
+static inline void mp_save_irq(struct mpc_intsrc *m) { };
+static inline void disable_ioapic_support(void) { }
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b7228268a6..615fa9061b5 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
int vector);
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector);
/* Avoid include hell */
#define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
}
#ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector);
extern void default_send_IPI_mask_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index c704b38c57a..ba870bb6dd8 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -10,9 +10,6 @@
#include <asm/apicdef.h>
#include <asm/irq_vectors.h>
-/* Even though we don't support this, supply it to appease OF */
-static inline void irq_dispose_mapping(unsigned int virq) { }
-
static inline int irq_canonicalize(int irq)
{
return ((irq == 2) ? 9 : irq);
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
new file mode 100644
index 00000000000..423bbbddf36
--- /dev/null
+++ b/arch/x86/include/asm/irq_controller.h
@@ -0,0 +1,12 @@
+#ifndef __IRQ_CONTROLLER__
+#define __IRQ_CONTROLLER__
+
+struct irq_domain {
+ int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type);
+ void *priv;
+ struct device_node *controller;
+ struct list_head l;
+};
+
+#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb..6e976ee3b3e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
+#include <linux/threads.h>
/*
* Linux IRQ vector layout.
*
@@ -16,8 +17,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... 237 : device interrupts
- * Vectors 238 ... 255 : special interrupts
+ * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
+ * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
@@ -96,37 +97,43 @@
#define THRESHOLD_APIC_VECTOR 0xf9
#define REBOOT_VECTOR 0xf8
-/* f0-f7 used for spreading out TLB flushes: */
-#define INVALIDATE_TLB_VECTOR_END 0xf7
-#define INVALIDATE_TLB_VECTOR_START 0xf0
-#define NUM_INVALIDATE_TLB_VECTORS 8
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
/*
* Generic system vector for platform specific use
*/
-#define X86_PLATFORM_IPI_VECTOR 0xed
+#define X86_PLATFORM_IPI_VECTOR 0xf7
/*
* IRQ work vector:
*/
-#define IRQ_WORK_VECTOR 0xec
+#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xea
+#define UV_BAU_MESSAGE 0xf5
/*
* Self IPI vector for machine checks
*/
-#define MCE_SELF_VECTOR 0xeb
+#define MCE_SELF_VECTOR 0xf4
/* Xen vector callback to receive events in a HVM domain */
-#define XEN_HVM_EVTCHN_CALLBACK 0xe9
+#define XEN_HVM_EVTCHN_CALLBACK 0xf3
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/* up to 32 vectors used for spreading out TLB flushes: */
+#if NR_CPUS <= 32
+# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
+#else
+# define NUM_INVALIDATE_TLB_VECTORS (32)
+#endif
+
+#define INVALIDATE_TLB_VECTOR_END (0xee)
+#define INVALIDATE_TLB_VECTOR_START \
+ (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 0c90dd9f050..9c7d95f6174 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
#define MAX_IRQ_SOURCES 256
extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
#ifdef CONFIG_X86_NUMAQ
extern int mp_bus_id_to_node[MAX_MP_BUSSES];
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif
-#define MAX_APICID 256
-
#else /* CONFIG_X86_64: */
#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400d313..3d4dab43c99 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,57 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/topology.h>
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+
+#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+ __apicid_to_node[apicid] = node;
+}
+#else /* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_X86_32
# include "numa_32.h"
#else
# include "numa_64.h"
#endif
+
+#ifdef CONFIG_NUMA
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
+#else /* CONFIG_NUMA */
+static inline void numa_set_node(int cpu, int node) { }
+static inline void numa_clear_node(int cpu) { }
+static inline void numa_init_array(void) { }
+static inline void init_cpu_to_node(void) { }
+static inline void numa_add_cpu(int cpu) { }
+static inline void numa_remove_cpu(int cpu) { }
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
+#endif
+
+#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b449a9..c6beed1ef10 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,7 +4,12 @@
extern int numa_off;
extern int pxm_to_nid(int pxm);
-extern void numa_remove_cpu(int cpu);
+
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int cpu);
+#else /* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be39607..344eb1790b4 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,23 +2,16 @@
#define _ASM_X86_NUMA_64_H
#include <linux/nodemask.h>
-#include <asm/apicdef.h>
struct bootnode {
u64 start;
u64 end;
};
-extern int compute_hash_shift(struct bootnode *nodes, int numblks,
- int *nodeids);
-
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-extern void numa_init_array(void);
extern int numa_off;
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
extern unsigned long numa_free_all_bootmem(void);
extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
@@ -31,11 +24,11 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
*/
#define NODE_MIN_SIZE (4*1024*1024)
-extern void __init init_cpu_to_node(void);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern int __cpuinit numa_cpu_node(int cpu);
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern void __init numa_set_distance(int from, int to, int distance);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
@@ -43,11 +36,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#else
-static inline void init_cpu_to_node(void) { }
-static inline void numa_set_node(int cpu, int node) { }
-static inline void numa_clear_node(int cpu) { }
-static inline void numa_add_cpu(int cpu, int node) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif
#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index 641988efe06..c5d3a5abbb9 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -6,7 +6,7 @@
#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */
-#ifdef CONFIG_OLPC_OPENFIRMWARE
+#ifdef CONFIG_OLPC
extern bool olpc_ofw_is_installed(void);
@@ -26,19 +26,15 @@ extern void setup_olpc_ofw_pgd(void);
/* check if OFW was detected during boot */
extern bool olpc_ofw_present(void);
-#else /* !CONFIG_OLPC_OPENFIRMWARE */
-
-static inline bool olpc_ofw_is_installed(void) { return false; }
+#else /* !CONFIG_OLPC */
static inline void olpc_ofw_detect(void) { }
static inline void setup_olpc_ofw_pgd(void) { }
-static inline bool olpc_ofw_present(void) { return false; }
-
-#endif /* !CONFIG_OLPC_OPENFIRMWARE */
+#endif /* !CONFIG_OLPC */
-#ifdef CONFIG_OLPC_OPENFIRMWARE_DT
+#ifdef CONFIG_OF_PROMTREE
extern void olpc_dt_build_devicetree(void);
#else
static inline void olpc_dt_build_devicetree(void) { }
-#endif /* CONFIG_OLPC_OPENFIRMWARE_DT */
+#endif
#endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 1df66211fd1..bce688d54c1 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PAGE_DEFS_H
#include <linux/const.h>
+#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
@@ -45,11 +46,15 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
+static inline phys_addr_t get_max_mapped(void)
+{
+ return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
+}
+
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8);
+extern void initmem_init(void);
extern void free_initmem(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa18..4c25ab48257 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
int x86_cache_alignment; /* In bytes */
int x86_power;
unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index b4ec95f0751..971e0b46446 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1 +1,69 @@
-/* dummy prom.h; here to make linux/of.h's #includes happy */
+/*
+ * Definitions for Device tree / OpenFirmware handling on X86
+ *
+ * based on arch/powerpc/include/asm/prom.h which is
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_X86_PROM_H
+#define _ASM_X86_PROM_H
+#ifndef __ASSEMBLY__
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#include <asm/irq.h>
+#include <asm/atomic.h>
+#include <asm/setup.h>
+#include <asm/irq_controller.h>
+
+#ifdef CONFIG_OF
+extern int of_ioapic;
+extern u64 initial_dtb;
+extern void add_dtb(u64 data);
+extern void x86_add_irq_domains(void);
+void __cpuinit x86_of_pci_init(void);
+void x86_dtb_init(void);
+
+static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
+{
+ return pdev ? pdev->dev.of_node : NULL;
+}
+
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ return pci_device_to_OF_node(bus->self);
+}
+
+#else
+static inline void add_dtb(u64 data) { }
+static inline void x86_add_irq_domains(void) { }
+static inline void x86_of_pci_init(void) { }
+static inline void x86_dtb_init(void) { }
+#define of_ioapic 0
+#endif
+
+extern char cmd_line[COMMAND_LINE_SIZE];
+
+#define pci_address_to_pio pci_address_to_pio
+unsigned long pci_address_to_pio(phys_addr_t addr);
+
+/**
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
+ *
+ * FIXME: We really should implement proper virq handling like power,
+ * but that's going to be major surgery.
+ */
+static inline void irq_dispose_mapping(unsigned int virq) { }
+
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b6..df4cd32b4cc 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
#endif
#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
#include <asm/asm.h>
-struct rwsem_waiter;
-
-extern asmregparm struct rw_semaphore *
- rwsem_down_read_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_down_write_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_wake(struct rw_semaphore *);
-extern asmregparm struct rw_semaphore *
- rwsem_downgrade_wake(struct rw_semaphore *sem);
-
/*
- * the semaphore definition
- *
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-typedef signed long rwsem_count_t;
-
-struct rw_semaphore {
- rwsem_count_t count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
/*
* lock for reading
*/
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t result, tmp;
+ long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
/*
* implement atomic add functionality
*/
-static inline void rwsem_atomic_add(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
{
asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
: "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
/*
* implement exchange and add functionality
*/
-static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
- rwsem_count_t tmp = delta;
+ long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
return tmp + delta;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c1bbfa89a0e..73b11bc0ae6 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -33,6 +33,8 @@ static inline bool cpu_has_ht_siblings(void)
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
@@ -46,8 +48,16 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
/* Static state in head.S used to set up a CPU */
extern unsigned long stack_start; /* Initial stack pointer address */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3ea878..12569e691ce 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -98,8 +98,6 @@ do { \
*/
#define HAVE_DISABLE_HLT
#else
-#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
/* frame pointer must be last for get_wchan */
#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc31e5..910a7084f7f 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
#include <asm/mpspec.h>
-#ifdef CONFIG_X86_32
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int __cpu_to_node(int cpu)
-{
- return cpu_to_node_map[cpu];
-}
-#define early_cpu_to_node __cpu_to_node
-#define cpu_to_node __cpu_to_node
-
-#else /* CONFIG_X86_64 */
-
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#endif /* CONFIG_X86_64 */
-
/* Mappings between node number and cpus on that node. */
extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
@@ -155,7 +138,7 @@ extern unsigned long node_remap_size[];
.balance_interval = 1, \
}
-#ifdef CONFIG_X86_64_ACPI_NUMA
+#ifdef CONFIG_X86_64
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
#endif
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0..ffaf183c619 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,13 @@
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
+#define __NR_name_to_handle_at 341
+#define __NR_open_by_handle_at 342
+#define __NR_clock_adjtime 343
#ifdef __KERNEL__
-#define NR_syscalls 341
+#define NR_syscalls 344
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715..5466bea670e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 303
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 304
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+#define __NR_clock_adjtime 305
+__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a43..3e094af443c 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
- unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */
+ unsigned int base_dest_nodeid:15; /* nasid of the */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 64642ad019f..643ebf2e2ad 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -83,11 +83,13 @@ struct x86_init_paging {
* boot cpu
* @tsc_pre_init: platform function called before TSC init
* @timer_init: initialize the platform timer (default PIT/HPET)
+ * @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
void (*tsc_pre_init)(void);
void (*timer_init)(void);
+ void (*wallclock_init)(void);
};
/**
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae4025..8508bfe5229 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
#endif
static inline int
-HYPERVISOR_suspend(unsigned long srec)
+HYPERVISOR_suspend(unsigned long start_info_mfn)
{
- return _hypercall3(int, sched_op, SCHEDOP_shutdown,
- SHUTDOWN_suspend, srec);
+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
+
+ /*
+ * For a PV guest the tools require that the start_info mfn be
+ * present in rdx/edx when the hypercall is made. Per the
+ * hypercall calling convention this is the third hypercall
+ * argument, which is start_info_mfn here.
+ */
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
}
static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a3..c61934fbf22 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
#define INVALID_P2M_ENTRY (~0UL)
-#define FOREIGN_FRAME_BIT (1UL<<31)
+#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
+#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
/* Maximum amount of memory we can handle in a domain in pages */
#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page);
extern int m2p_remove_override(struct page *page);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
+#ifdef CONFIG_XEN_DEBUG_FS
+extern int p2m_dump_show(struct seq_file *m, void *v);
+#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
mfn = get_phys_to_machine(pfn);
if (mfn != INVALID_P2M_ENTRY)
- mfn &= ~FOREIGN_FRAME_BIT;
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
return mfn;
}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
unsigned long pfn;
+ int ret = 0;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
+ if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+ pfn = ~0;
+ goto try_override;
+ }
pfn = 0;
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
- __get_user(pfn, &machine_to_phys_mapping[mfn]);
-
- /*
- * If this appears to be a foreign mfn (because the pfn
- * doesn't map back to the mfn), then check the local override
- * table to see if there's a better pfn to use.
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+try_override:
+ /* ret might be < 0 if there are no entries in the m2p for mfn */
+ if (ret < 0)
+ pfn = ~0;
+ else if (get_phys_to_machine(pfn) != mfn)
+ /*
+ * If this appears to be a foreign mfn (because the pfn
+ * doesn't map back to the mfn), then check the local override
+ * table to see if there's a better pfn to use.
+ *
+ * m2p_find_override_pfn returns ~0 if it doesn't find anything.
+ */
+ pfn = m2p_find_override_pfn(mfn, ~0);
+
+ /*
+ * pfn is ~0 if there are no entries in the m2p for mfn or if the
+ * entry doesn't map back to the mfn and m2p_override doesn't have a
+ * valid entry for it.
*/
- if (get_phys_to_machine(pfn) != mfn)
- pfn = m2p_find_override_pfn(mfn, pfn);
+ if (pfn == ~0 &&
+ get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+ pfn = mfn;
return pfn;
}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3eaf8d..aa862098916 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
* its own functions.
*/
struct xen_pci_frontend_ops {
- int (*enable_msi)(struct pci_dev *dev, int **vectors);
+ int (*enable_msi)(struct pci_dev *dev, int vectors[]);
void (*disable_msi)(struct pci_dev *dev);
- int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+ int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
void (*disable_msix)(struct pci_dev *dev);
};
extern struct xen_pci_frontend_ops *xen_pci_frontend;
static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
- int **vectors)
+ int vectors[])
{
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
xen_pci_frontend->disable_msi(dev);
}
static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
- int **vectors, int nvec)
+ int vectors[], int nvec)
{
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2cd88..62445ba2f8a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,9 +66,9 @@ obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
+obj-$(CONFIG_SMP) += smpboot.o
+obj-$(CONFIG_SMP) += tsc_sync.o
obj-$(CONFIG_SMP) += setup_percpu.o
-obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
@@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
+obj-$(CONFIG_OF) += devicetree.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3e6e2d68f76..9a966c579af 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -595,14 +595,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
nid = acpi_get_node(handle);
if (nid == -1 || !node_online(nid))
return;
-#ifdef CONFIG_X86_64
- apicid_to_node[physid] = nid;
+ set_apicid_to_node(physid, nid);
numa_set_node(cpu, nid);
-#else /* CONFIG_X86_32 */
- apicid_2_node[physid] = nid;
- cpu_to_node_map[cpu] = nid;
-#endif
-
#endif
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc..ed3c2e5b714 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,7 +12,7 @@
static u32 *flush_words;
-struct pci_device_id amd_nb_misc_ids[] = {
+const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+static struct pci_device_id amd_nb_link_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
+ {}
+};
+
const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ 0x00, 0x18, 0x20 },
{ 0xff, 0x00, 0x20 },
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
static struct pci_dev *next_northbridge(struct pci_dev *dev,
- struct pci_device_id *ids)
+ const struct pci_device_id *ids)
{
do {
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
{
int i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc;
+ struct pci_dev *misc, *link;
if (amd_nb_num())
return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = i;
- misc = NULL;
+ link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
+ node_to_amd_nb(i)->link = link =
+ next_northbridge(link, amd_nb_link_ids);
}
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,13 @@ int amd_cache_northbridges(void)
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+
+ /* L3 cache partitioning is supported on family 0x15 */
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -93,8 +107,9 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
they're useless anyways */
int __init early_is_amd_nb(u32 device)
{
- struct pci_device_id *id;
+ const struct pci_device_id *id;
u32 vendor = device & 0xffff;
+
device >>= 16;
for (id = amd_nb_misc_ids; id->vendor; id++)
if (vendor == id->vendor && device == id->device)
@@ -102,6 +117,65 @@ int __init early_is_amd_nb(u32 device)
return 0;
}
+int amd_get_subcaches(int cpu)
+{
+ struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+ unsigned int mask;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return 0;
+
+ pci_read_config_dword(link, 0x1d4, &mask);
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ return (mask >> (4 * cuid)) & 0xf;
+}
+
+int amd_set_subcaches(int cpu, int mask)
+{
+ static unsigned int reset, ban;
+ struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ unsigned int reg;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
+ return -EINVAL;
+
+ /* if necessary, collect reset state of L3 partitioning and BAN mode */
+ if (reset == 0) {
+ pci_read_config_dword(nb->link, 0x1d4, &reset);
+ pci_read_config_dword(nb->misc, 0x1b8, &ban);
+ ban &= 0x180000;
+ }
+
+ /* deactivate BAN mode if any subcaches are to be disabled */
+ if (mask != 0xf) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
+ }
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ mask <<= 4 * cuid;
+ mask |= (0xf ^ (1 << cuid)) << 26;
+
+ pci_write_config_dword(nb->link, 0x1d4, mask);
+
+ /* reset BAN mode if L3 partitioning returned to reset state */
+ pci_read_config_dword(nb->link, 0x1d4, &reg);
+ if (reg == reset) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ reg &= ~0x180000;
+ pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
+ }
+
+ return 0;
+}
+
int amd_cache_gart(void)
{
int i;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51d4e166306..1293c709ee8 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -508,64 +508,12 @@ static int apbt_next_event(unsigned long delta,
return 0;
}
-/*
- * APB timer clock is not in sync with pclk on Langwell, which translates to
- * unreliable read value caused by sampling error. the error does not add up
- * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
- * would go backwards. the following code is trying to prevent time traveling
- * backwards. little bit paranoid.
- */
static cycle_t apbt_read_clocksource(struct clocksource *cs)
{
- unsigned long t0, t1, t2;
- static unsigned long last_read;
-
-bad_count:
- t1 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- t2 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- if (unlikely(t1 < t2)) {
- pr_debug("APBT: read current count error %lx:%lx:%lx\n",
- t1, t2, t2 - t1);
- goto bad_count;
- }
- /*
- * check against cached last read, makes sure time does not go back.
- * it could be a normal rollover but we will do tripple check anyway
- */
- if (unlikely(t2 > last_read)) {
- /* check if we have a normal rollover */
- unsigned long raw_intr_status =
- apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
- /*
- * cs timer interrupt is masked but raw intr bit is set if
- * rollover occurs. then we read EOI reg to clear it.
- */
- if (raw_intr_status & (1 << phy_cs_timer_id)) {
- apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
- goto out;
- }
- pr_debug("APB CS going back %lx:%lx:%lx ",
- t2, last_read, t2 - last_read);
-bad_count_x3:
- pr_debug("triple check enforced\n");
- t0 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- udelay(1);
- t1 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- udelay(1);
- t2 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- if ((t2 > t1) || (t1 > t0)) {
- printk(KERN_ERR "Error: APB CS tripple check failed\n");
- goto bad_count_x3;
- }
- }
-out:
- last_read = t2;
- return (cycle_t)~t2;
+ unsigned long current_count;
+
+ current_count = apbt_readl(phy_cs_timer_id, APBTMR_N_CURRENT_VALUE);
+ return (cycle_t)~current_count;
}
static int apbt_clocksource_register(void)
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5955a7800a9..7b1e8e10b89 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
u32 aper_size;
- void *p;
+ unsigned long addr;
/* aper_size should <= 1G */
if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
* so don't use 512M below as gart iommu, leave the space for kernel
* code for safe
*/
- p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+ addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
+ if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+ printk(KERN_ERR
+ "Cannot allocate aperture memory hole (%lx,%uK)\n",
+ addr, aper_size>>10);
+ return 0;
+ }
+ memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
- kmemleak_ignore(p);
- if (!p || __pa(p)+aper_size > 0xffffffff) {
- printk(KERN_ERR
- "Cannot allocate aperture memory hole (%p,%uK)\n",
- p, aper_size>>10);
- if (p)
- free_bootmem(__pa(p), aper_size);
- return 0;
- }
+ kmemleak_ignore(phys_to_virt(addr));
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, __pa(p));
- insert_aperture_resource((u32)__pa(p), aper_size);
- register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
- (u32)__pa(p+aper_size) >> PAGE_SHIFT);
+ aper_size >> 10, addr);
+ insert_aperture_resource((u32)addr, aper_size);
+ register_nosave_region(addr >> PAGE_SHIFT,
+ (addr+aper_size) >> PAGE_SHIFT);
- return (u32)__pa(p);
+ return (u32)addr;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978..966673f4414 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -43,6 +43,7 @@
#include <asm/i8259.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/desc.h>
#include <asm/hpet.h>
#include <asm/idle.h>
@@ -78,12 +79,21 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#ifdef CONFIG_X86_32
+
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use. The following early percpu variable is
+ * used for the mapping. This is where the behaviors of x86_64 and 32
+ * actually diverge. Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+
/*
* Knob to control our willingness to enable the local APIC.
*
* +1=force-enable
*/
-static int force_enable_local_apic;
+static int force_enable_local_apic __initdata;
/*
* APIC command line parameters
*/
@@ -153,7 +163,7 @@ early_param("nox2apic", setup_nox2apic);
unsigned long mp_lapic_addr;
int disable_apic;
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
+static int disable_apic_timer __initdata;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -177,29 +187,8 @@ static struct resource lapic_resource = {
static unsigned int calibration_result;
-static int lapic_next_event(unsigned long delta,
- struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
- struct clock_event_device *evt);
-static void lapic_timer_broadcast(const struct cpumask *mask);
static void apic_pm_activate(void);
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
- .name = "lapic",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
- | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
- .shift = 32,
- .set_mode = lapic_timer_setup,
- .set_next_event = lapic_next_event,
- .broadcast = lapic_timer_broadcast,
- .rating = 100,
- .irq = -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
static unsigned long apic_phys;
/*
@@ -238,7 +227,7 @@ static int modern_apic(void)
* right after this call apic become NOOP driven
* so apic->write/read doesn't do anything
*/
-void apic_disable(void)
+static void __init apic_disable(void)
{
pr_info("APIC: switched to apic NOOP\n");
apic = &apic_noop;
@@ -282,23 +271,6 @@ u64 native_apic_icr_read(void)
return icr1 | ((u64)icr2 << 32);
}
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
- unsigned int v;
-
- /* unmask and set to NMI */
- v = APIC_DM_NMI;
-
- /* Level triggered for 82489DX (32bit mode) */
- if (!lapic_is_integrated())
- v |= APIC_LVT_LEVEL_TRIGGER;
-
- apic_write(APIC_LVT0, v);
-}
-
#ifdef CONFIG_X86_32
/**
* get_physical_broadcast - Get number of physical broadcast IDs
@@ -508,6 +480,23 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
#endif
}
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device lapic_clockevent = {
+ .name = "lapic",
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+ | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+ .broadcast = lapic_timer_broadcast,
+ .rating = 100,
+ .irq = -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
/*
* Setup the local APIC timer for this CPU. Copy the initialized values
* of the boot CPU and register the clock event in the framework.
@@ -1209,7 +1198,7 @@ void __cpuinit setup_local_APIC(void)
rdtscll(tsc);
if (disable_apic) {
- arch_disable_smp_support();
+ disable_ioapic_support();
return;
}
@@ -1237,6 +1226,19 @@ void __cpuinit setup_local_APIC(void)
*/
apic->init_apic_ldr();
+#ifdef CONFIG_X86_32
+ /*
+ * APIC LDR is initialized. If logical_apicid mapping was
+ * initialized during get_smp_config(), make sure it matches the
+ * actual value.
+ */
+ i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+ /* always use the value from LDR */
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ logical_smp_processor_id();
+#endif
+
/*
* Set Task Priority to 'accept all'. We never change this
* later on.
@@ -1448,7 +1450,7 @@ int __init enable_IR(void)
void __init enable_IR_x2apic(void)
{
unsigned long flags;
- struct IO_APIC_route_entry **ioapic_entries = NULL;
+ struct IO_APIC_route_entry **ioapic_entries;
int ret, x2apic_enabled = 0;
int dmar_table_init_ret;
@@ -1537,7 +1539,7 @@ static int __init detect_init_APIC(void)
}
#else
-static int apic_verify(void)
+static int __init apic_verify(void)
{
u32 features, h, l;
@@ -1562,7 +1564,7 @@ static int apic_verify(void)
return 0;
}
-int apic_force_enable(void)
+int __init apic_force_enable(unsigned long addr)
{
u32 h, l;
@@ -1578,7 +1580,7 @@ int apic_force_enable(void)
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
pr_info("Local APIC disabled by BIOS -- reenabling.\n");
l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | addr;
wrmsr(MSR_IA32_APICBASE, l, h);
enabled_via_apicbase = 1;
}
@@ -1619,7 +1621,7 @@ static int __init detect_init_APIC(void)
"you can enable it with \"lapic\"\n");
return -1;
}
- if (apic_force_enable())
+ if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
return -1;
} else {
if (apic_verify())
@@ -1930,17 +1932,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
- /*
- * Validate version
- */
- if (version == 0x0) {
- pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
if (num_processors >= nr_cpu_ids) {
int max = nr_cpu_ids;
int thiscpu = max + disabled_cpus;
@@ -1954,22 +1945,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
num_processors++;
- cpu = cpumask_next_zero(-1, cpu_present_mask);
-
- if (version != apic_version[boot_cpu_physical_apicid])
- WARN_ONCE(1,
- "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
-
- physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) {
/*
* x86_bios_cpu_apicid is required to have processors listed
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
+ * boot_cpu_init() already hold bit 0 in cpu_present_mask
+ * for BSP.
*/
cpu = 0;
+ } else
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /*
+ * Validate version
+ */
+ if (version == 0x0) {
+ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
+ version = 0x10;
}
+ apic_version[apicid] = version;
+
+ if (version != apic_version[boot_cpu_physical_apicid]) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ apic_version[boot_cpu_physical_apicid], cpu, version);
+ }
+
+ physid_set(apicid, phys_cpu_present_map);
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
@@ -1977,7 +1980,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
#endif
-
+#ifdef CONFIG_X86_32
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ apic->x86_32_early_logical_apicid(cpu);
+#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -1998,10 +2004,14 @@ void default_init_apic_ldr(void)
}
#ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
+int default_x86_32_numa_cpu_node(int cpu)
{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
+#ifdef CONFIG_NUMA
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
#else
return 0;
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c..5652d31fe10 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,8 +185,6 @@ struct apic apic_flat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
@@ -337,8 +335,6 @@ struct apic apic_physflat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f..f1baa2dc087 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
return 0;
}
-static int noop_cpu_to_logical_apicid(int cpu)
-{
- return 0;
-}
-
static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
{
return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-int noop_apicid_to_node(int logical_apicid)
-{
- /* we're always on node 0 */
- return 0;
-}
-
static u32 noop_apic_read(u32 reg)
{
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
+#ifdef CONFIG_X86_32
+static int noop_x86_32_numa_cpu_node(int cpu)
+{
+ /* we're always on node 0 */
+ return 0;
+}
+#endif
+
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
@@ -153,9 +150,7 @@ struct apic apic_noop = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = noop_apicid_to_node,
- .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
@@ -197,4 +192,9 @@ struct apic apic_noop = {
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
.safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
+#endif
};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b..541a2e43165 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
return 1;
}
+static int bigsmp_early_logical_apicid(int cpu)
+{
+ /* on bigsmp, logical apicid is the same as physical */
+ return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
static inline unsigned long calculate_ldr(int cpu)
{
unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
nr_ioapics);
}
-static int bigsmp_apicid_to_node(int logical_apicid)
-{
- return apicid_2_node[hard_smp_processor_id()];
-}
-
static int bigsmp_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-/* Mapping from cpu number to logical apicid */
-static inline int bigsmp_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_physical_id(cpu);
-}
-
static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
/* As we are using single CPU as destination, pick only one CPU here */
static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
+ int cpu = cpumask_first(cpumask);
+
+ if (cpu < nr_cpu_ids)
+ return cpu_physical_id(cpu);
+ return BAD_APICID;
}
static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
*/
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
+ return cpu_physical_id(cpu);
}
- return bigsmp_cpu_to_logical_apicid(cpu);
+ return BAD_APICID;
}
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = bigsmp_apicid_to_node,
- .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d802..3e9de4854c5 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
return physid_isset(bit, phys_cpu_present_map);
}
+static int es7000_early_logical_apicid(int cpu)
+{
+ /* on es7000, logical apicid is the same as physical */
+ return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
static unsigned long calculate_ldr(int cpu)
{
unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void)
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
-static int es7000_apicid_to_node(int logical_apicid)
+static int es7000_numa_cpu_node(int cpu)
{
return 0;
}
-
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
++cpu_id;
}
-/* Mapping from cpu number to logical apicid */
-static int es7000_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
WARN(1, "Not a valid mask!");
@@ -578,7 +571,7 @@ static unsigned int
es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = es7000_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
struct apic __refdata apic_es7000 = {
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 79fd43ca6f9..c4e557a1ebb 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
arch_spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
- dump_stack();
arch_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return NOTIFY_STOP;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ca9e2a3545a..4b5ebd26f56 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -108,7 +108,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
int skip_ioapic_setup;
-void arch_disable_smp_support(void)
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+void disable_ioapic_support(void)
{
#ifdef CONFIG_PCI
noioapicquirk = 1;
@@ -120,11 +123,14 @@ void arch_disable_smp_support(void)
static int __init parse_noapic(char *str)
{
/* disable IO-APIC */
- arch_disable_smp_support();
+ disable_ioapic_support();
return 0;
}
early_param("noapic", parse_noapic);
+static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr);
+
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
{
@@ -181,7 +187,7 @@ int __init arch_early_irq_init(void)
irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
for (i = 0; i < count; i++) {
- set_irq_chip_data(i, &cfg[i]);
+ irq_set_chip_data(i, &cfg[i]);
zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
@@ -200,7 +206,7 @@ int __init arch_early_irq_init(void)
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg *irq_cfg(unsigned int irq)
{
- return get_irq_chip_data(irq);
+ return irq_get_chip_data(irq);
}
static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
@@ -226,7 +232,7 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
{
if (!cfg)
return;
- set_irq_chip_data(at, NULL);
+ irq_set_chip_data(at, NULL);
free_cpumask_var(cfg->domain);
free_cpumask_var(cfg->old_domain);
kfree(cfg);
@@ -256,14 +262,14 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
if (res < 0) {
if (res != -EEXIST)
return NULL;
- cfg = get_irq_chip_data(at);
+ cfg = irq_get_chip_data(at);
if (cfg)
return cfg;
}
cfg = alloc_irq_cfg(at, node);
if (cfg)
- set_irq_chip_data(at, cfg);
+ irq_set_chip_data(at, cfg);
else
irq_free_desc(at);
return cfg;
@@ -818,7 +824,7 @@ static int EISA_ELCR(unsigned int irq)
#define default_MCA_trigger(idx) (1)
#define default_MCA_polarity(idx) default_ISA_polarity(idx)
-static int MPBIOS_polarity(int idx)
+static int irq_polarity(int idx)
{
int bus = mp_irqs[idx].srcbus;
int polarity;
@@ -860,7 +866,7 @@ static int MPBIOS_polarity(int idx)
return polarity;
}
-static int MPBIOS_trigger(int idx)
+static int irq_trigger(int idx)
{
int bus = mp_irqs[idx].srcbus;
int trigger;
@@ -932,16 +938,6 @@ static int MPBIOS_trigger(int idx)
return trigger;
}
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
static int pin_2_irq(int idx, int apic, int pin)
{
int irq;
@@ -1189,7 +1185,7 @@ void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
/*
@@ -1220,10 +1216,6 @@ void __setup_vector_irq(int cpu)
static struct irq_chip ioapic_chip;
static struct irq_chip ir_ioapic_chip;
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
#ifdef CONFIG_X86_32
static inline int IO_APIC_irq_trigger(int irq)
{
@@ -1248,35 +1240,31 @@ static inline int IO_APIC_irq_trigger(int irq)
}
#endif
-static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
+ unsigned long trigger)
{
+ struct irq_chip *chip = &ioapic_chip;
+ irq_flow_handler_t hdl;
+ bool fasteoi;
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
+ trigger == IOAPIC_LEVEL) {
irq_set_status_flags(irq, IRQ_LEVEL);
- else
+ fasteoi = true;
+ } else {
irq_clear_status_flags(irq, IRQ_LEVEL);
+ fasteoi = false;
+ }
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(cfg)) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (trigger)
- set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
- handle_fasteoi_irq,
- "fasteoi");
- else
- set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
- handle_edge_irq, "edge");
- return;
+ chip = &ir_ioapic_chip;
+ fasteoi = trigger != 0;
}
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq,
- "fasteoi");
- else
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_edge_irq, "edge");
+ hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+ irq_set_chip_and_handler_name(irq, chip, hdl,
+ fasteoi ? "fasteoi" : "edge");
}
static int setup_ioapic_entry(int apic_id, int irq,
@@ -1374,7 +1362,7 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
return;
}
- ioapic_register_intr(irq, trigger);
+ ioapic_register_intr(irq, cfg, trigger);
if (irq < legacy_pic->nr_legacy_irqs)
legacy_pic->mask(irq);
@@ -1385,33 +1373,26 @@ static struct {
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
-static void __init setup_IO_APIC_irqs(void)
+static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
{
- int apic_id, pin, idx, irq, notcon = 0;
- int node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ if (idx != -1)
+ return false;
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
+ mp_ioapics[apic_id].apicid, pin);
+ return true;
+}
+
+static void __init __io_apic_setup_irqs(unsigned int apic_id)
+{
+ int idx, node = cpu_to_node(0);
+ struct io_apic_irq_attr attr;
+ unsigned int pin, irq;
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
+ if (io_apic_pin_not_connected(idx, apic_id, pin))
continue;
- }
- if (notcon) {
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
irq = pin_2_irq(idx, apic_id, pin);
@@ -1423,25 +1404,24 @@ static void __init setup_IO_APIC_irqs(void)
* installed and if it returns 1:
*/
if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
+ apic->multi_timer_check(apic_id, irq))
continue;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- continue;
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- add_pin_to_irq_node(cfg, node, apic_id, pin);
- /*
- * don't mark it in pin_programmed, so later acpi could
- * set it correctly when irq < 16
- */
- setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
- irq_polarity(idx));
+ io_apic_setup_irq_pin(irq, node, &attr);
}
+}
- if (notcon)
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
+static void __init setup_IO_APIC_irqs(void)
+{
+ unsigned int apic_id;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+ __io_apic_setup_irqs(apic_id);
}
/*
@@ -1452,7 +1432,7 @@ static void __init setup_IO_APIC_irqs(void)
void setup_IO_APIC_irq_extra(u32 gsi)
{
int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1472,21 +1452,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (apic_id == 0 || irq < NR_IRQS_LEGACY)
return;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return;
-
- add_pin_to_irq_node(cfg, node, apic_id, pin);
-
- if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[apic_id].apicid, pin);
- return;
- }
- set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- setup_ioapic_irq(apic_id, pin, irq, cfg,
- irq_trigger(idx), irq_polarity(idx));
+ io_apic_setup_irq_pin_once(irq, node, &attr);
}
/*
@@ -1518,7 +1487,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
* The timer IRQ doesn't have to know that behind the
* scene we may have a 8259A-master in AEOI mode ...
*/
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
/*
* Add it to the IO-APIC irq-routing table:
@@ -1625,7 +1595,7 @@ __apicdebuginit(void) print_IO_APIC(void)
for_each_active_irq(irq) {
struct irq_pin_list *entry;
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -2391,7 +2361,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
void irq_force_complete_move(int irq)
{
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
if (!cfg)
return;
@@ -2405,7 +2375,7 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { }
static void ack_apic_edge(struct irq_data *data)
{
irq_complete_move(data->chip_data);
- move_native_irq(data->irq);
+ irq_move_irq(data);
ack_APIC_irq();
}
@@ -2462,7 +2432,7 @@ static void ack_apic_level(struct irq_data *data)
irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irqd_is_setaffinity_pending(data))) {
do_unmask_irq = 1;
mask_ioapic(cfg);
}
@@ -2551,7 +2521,7 @@ static void ack_apic_level(struct irq_data *data)
* and you can go talk to the chipset vendor about it.
*/
if (!io_apic_level_ack_pending(cfg))
- move_masked_irq(irq);
+ irq_move_masked_irq(data);
unmask_ioapic(cfg);
}
}
@@ -2614,7 +2584,7 @@ static inline void init_IO_APIC_traps(void)
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
for_each_active_irq(irq) {
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
@@ -2625,7 +2595,7 @@ static inline void init_IO_APIC_traps(void)
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
- set_irq_chip(irq, &no_irq_chip);
+ irq_set_chip(irq, &no_irq_chip);
}
}
}
@@ -2665,7 +2635,7 @@ static struct irq_chip lapic_chip __read_mostly = {
static void lapic_register_intr(int irq)
{
irq_clear_status_flags(irq, IRQ_LEVEL);
- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
}
@@ -2749,7 +2719,7 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = get_irq_chip_data(0);
+ struct irq_cfg *cfg = irq_get_chip_data(0);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -3060,7 +3030,7 @@ unsigned int create_irq_nr(unsigned int from, int node)
raw_spin_unlock_irqrestore(&vector_lock, flags);
if (ret) {
- set_irq_chip_data(irq, cfg);
+ irq_set_chip_data(irq, cfg);
irq_clear_status_flags(irq, IRQ_NOREQUEST);
} else {
free_irq_at(irq, cfg);
@@ -3085,7 +3055,7 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
unsigned long flags;
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
@@ -3119,7 +3089,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(cfg)) {
struct irte irte;
int ir_index;
u16 sub_handle;
@@ -3291,6 +3261,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
+ struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
int ret;
@@ -3298,14 +3269,15 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
if (ret < 0)
return ret;
- set_irq_msi(irq, msidesc);
+ irq_set_msi_desc(irq, msidesc);
write_msi_msg(irq, &msg);
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(irq_get_chip_data(irq))) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
- } else
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+ chip = &msi_ir_chip;
+ }
+
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@@ -3423,8 +3395,8 @@ int arch_setup_dmar_msi(unsigned int irq)
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
- set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
+ irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
return 0;
}
#endif
@@ -3482,6 +3454,7 @@ static struct irq_chip hpet_msi_type = {
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
+ struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
@@ -3501,15 +3474,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
if (ret < 0)
return ret;
- hpet_msi_write(get_irq_data(irq), &msg);
+ hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (irq_remapped(get_irq_chip_data(irq)))
- set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
- handle_edge_irq, "edge");
- else
- set_irq_chip_and_handler_name(irq, &hpet_msi_type,
- handle_edge_irq, "edge");
+ if (irq_remapped(irq_get_chip_data(irq)))
+ chip = &ir_hpet_msi_type;
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
}
#endif
@@ -3596,7 +3566,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
write_ht_irq_msg(irq, &msg);
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+ irq_set_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3605,7 +3575,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */
-int __init io_apic_get_redir_entries (int ioapic)
+int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ int ret;
+
+ if (!cfg)
+ return -EINVAL;
+ ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+ if (!ret)
+ setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
+ attr->trigger, attr->polarity);
+ return ret;
+}
+
+static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr)
+{
+ unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+ int ret;
+
+ /* Avoid redundant programming */
+ if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[id].apicid, pin);
+ return 0;
+ }
+ ret = io_apic_setup_irq_pin(irq, node, attr);
+ if (!ret)
+ set_bit(pin, mp_ioapic_routing[id].pin_programmed);
+ return ret;
+}
+
+static int __init io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3659,96 +3662,24 @@ int __init arch_probe_nr_irqs(void)
}
#endif
-static int __io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
{
- struct irq_cfg *cfg;
int node;
- int ioapic, pin;
- int trigger, polarity;
- ioapic = irq_attr->ioapic;
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
+ irq_attr->ioapic);
return -EINVAL;
}
- if (dev)
- node = dev_to_node(dev);
- else
- node = cpu_to_node(0);
-
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return 0;
-
- pin = irq_attr->ioapic_pin;
- trigger = irq_attr->trigger;
- polarity = irq_attr->polarity;
+ node = dev ? dev_to_node(dev) : cpu_to_node(0);
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= legacy_pic->nr_legacy_irqs) {
- if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
- printk(KERN_INFO "can not add pin %d for irq %d\n",
- pin, irq);
- return 0;
- }
- }
-
- setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
-
- return 0;
+ return io_apic_setup_irq_pin_once(irq, node, irq_attr);
}
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int ioapic, pin;
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- ioapic = irq_attr->ioapic;
- pin = irq_attr->ioapic_pin;
- if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[ioapic].apicid, pin);
- return 0;
- }
- set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
-
- return __io_apic_set_pci_routing(dev, irq, irq_attr);
-}
-
-u8 __init io_apic_unique_id(u8 id)
-{
#ifdef CONFIG_X86_32
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return io_apic_get_unique_id(nr_ioapics, id);
- else
- return id;
-#else
- int i;
- DECLARE_BITMAP(used, 256);
-
- bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
- struct mpc_ioapic *ia = &mp_ioapics[i];
- __set_bit(ia->apicid, used);
- }
- if (!test_bit(id, used))
- return id;
- return find_first_zero_bit(used, 256);
-#endif
-}
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int __init io_apic_get_unique_id(int ioapic, int apic_id)
{
union IO_APIC_reg_00 reg_00;
static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3821,9 +3752,33 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
return apic_id;
}
+
+static u8 __init io_apic_unique_id(u8 id)
+{
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return io_apic_get_unique_id(nr_ioapics, id);
+ else
+ return id;
+}
+#else
+static u8 __init io_apic_unique_id(u8 id)
+{
+ int i;
+ DECLARE_BITMAP(used, 256);
+
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mpc_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->apicid, used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+}
#endif
-int __init io_apic_get_version(int ioapic)
+static int __init io_apic_get_version(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3868,8 +3823,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
- struct irq_desc *desc;
const struct cpumask *mask;
+ struct irq_data *idata;
if (skip_ioapic_setup == 1)
return;
@@ -3884,21 +3839,20 @@ void __init setup_ioapic_dest(void)
if ((ioapic > 0) && (irq > 16))
continue;
- desc = irq_to_desc(irq);
+ idata = irq_get_irq_data(irq);
/*
* Honour affinities which have been set in early boot
*/
- if (desc->status &
- (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->irq_data.affinity;
+ if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
+ mask = idata->affinity;
else
mask = apic->target_cpus();
if (intr_remapping_enabled)
- ir_ioapic_set_affinity(&desc->irq_data, mask, false);
+ ir_ioapic_set_affinity(idata, mask, false);
else
- ioapic_set_affinity(&desc->irq_data, mask, false);
+ ioapic_set_affinity(idata, mask, false);
}
}
@@ -4026,7 +3980,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
return gsi - mp_gsi_routing[ioapic].gsi_base;
}
-static int bad_ioapic(unsigned long address)
+static __init int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
@@ -4086,20 +4040,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
physid_set_mask_of_physid(boot_cpu_physical_apicid,
&phys_cpu_present_map);
#endif
- /* Make sure the irq descriptor is set up */
- cfg = alloc_irq_and_cfg_at(0, 0);
-
setup_local_APIC();
- add_pin_to_irq_node(cfg, 0, 0, 0);
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
- setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
+ io_apic_setup_irq_pin(0, 0, &attr);
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6..cce91bf2667 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
local_irq_restore(flags);
}
+#ifdef CONFIG_X86_32
+
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
int vector)
{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
local_irq_restore(flags);
}
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
if (query_cpu == this_cpu)
continue;
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
}
local_irq_restore(flags);
}
-#ifdef CONFIG_X86_32
-
/*
* This is only used on smaller machines.
*/
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26ab5c9..6273eee5134 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
return physids_promote(0xFUL, retmap);
}
-static inline int numaq_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-}
-
/*
* Supporting over 60 cpus on NUMA-Q requires a locality-dependent
* cpu to APIC ID relation to properly interact with the intelligent
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
+static int numaq_numa_cpu_node(int cpu)
+{
+ int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (logical_apicid != BAD_APICID)
+ return numaq_apicid_to_node(logical_apicid);
+ return NUMA_NO_NODE;
+}
+
static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = {
.ioapic_phys_id_map = numaq_ioapic_phys_id_map,
.setup_apic_routing = numaq_setup_apic_routing,
.multi_timer_check = numaq_multi_timer_check,
- .apicid_to_node = numaq_apicid_to_node,
- .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
.cpu_present_to_apicid = numaq_cpu_present_to_apicid,
.apicid_to_cpu_present = numaq_apicid_to_cpu_present,
.setup_portio_remap = numaq_setup_portio_remap,
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = numaq_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe01608..fc84c7b6110 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
apic->setup_apic_routing();
}
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
static void setup_apic_flat_routing(void)
{
#ifdef CONFIG_X86_IO_APIC
@@ -130,8 +135,6 @@ struct apic apic_default = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = setup_apic_flat_routing,
.multi_timer_check = NULL,
- .apicid_to_node = default_apicid_to_node,
- .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -167,6 +170,9 @@ struct apic apic_default = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90..e4b8059b414 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
return 1;
}
-static void summit_init_apic_ldr(void)
+static int summit_early_logical_apicid(int cpu)
{
- unsigned long val, id;
int count = 0;
- u8 my_id = (u8)hard_smp_processor_id();
+ u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
u8 my_cluster = APIC_CLUSTER(my_id);
#ifdef CONFIG_SMP
u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
/* Create logical APIC IDs by counting CPUs already in cluster. */
for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = cpu_2_logical_apicid[i];
+ lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
++count;
}
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
/* We only have a 4 wide bitmap in cluster mode. If a deranged
* BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- id = my_cluster | (1UL << count);
+ return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ unsigned long val;
+
apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
nr_ioapics);
}
-static int summit_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
-#else
- return 0;
-#endif
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int summit_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static int summit_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = summit_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = summit_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -528,8 +514,6 @@ struct apic apic_summit = {
.ioapic_phys_id_map = summit_ioapic_phys_id_map,
.setup_apic_routing = summit_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = summit_apicid_to_node,
- .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
.cpu_present_to_apicid = summit_cpu_present_to_apicid,
.apicid_to_cpu_present = summit_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -565,4 +549,7 @@ struct apic apic_summit = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = summit_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f491..90949bbd566 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ce..c7e6d6645bf 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index bd16b58b885..3c289281394 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f4..4f13fafc526 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,70 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/crypto.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/sigframe.h>
+#include <asm/bootparam.h>
+#include <asm/suspend.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/xen.h>
+#endif
+
#ifdef CONFIG_X86_32
# include "asm-offsets_32.c"
#else
# include "asm-offsets_64.c"
#endif
+
+void common(void) {
+ BLANK();
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+
+ BLANK();
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+ BLANK();
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
+
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
+ OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#endif
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..c29d631af6f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
#include <asm/ucontext.h>
-#include <asm/sigframe.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/bootparam.h>
-#include <asm/elf.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_task, thread_info, task);
- OFFSET(TI_exec_domain, thread_info, exec_domain);
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
- OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
- OFFSET(GDS_size, desc_ptr, size);
- OFFSET(GDS_address, desc_ptr, address);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
- OFFSET(pbe_address, pbe, address);
- OFFSET(pbe_orig_address, pbe, orig_address);
- OFFSET(pbe_next, pbe, next);
-
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
- DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
-
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
-
-#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
- OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
-#endif
-
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#endif
-
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
-
- BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..e72a1194af2 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
#include <asm/ia32.h>
-#include <asm/bootparam.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
-
-#include <asm/sigframe.h>
#define __NO_STUBS 1
#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
int main(void)
{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
-#ifdef CONFIG_IA32_EMULATION
- ENTRY(sysenter_return);
-#endif
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
- OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+ BLANK();
#endif
-
#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ BLANK();
+
+#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
ENTRY(ax);
ENTRY(bx);
ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
ENTRY(ip);
BLANK();
#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
+
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
BLANK();
#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- BLANK();
-#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
+
+#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(bx);
ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
ENTRY(flags);
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
+
+#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
ENTRY(cr0);
ENTRY(cr2);
ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
ENTRY(cr8);
BLANK();
#undef ENTRY
- DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
- BLANK();
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#undef ENTRY
-#endif
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+
return 0;
}
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a38917951..452932d3473 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
addr += size;
}
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
- num_scan_areas);
+ if (num_scan_areas)
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
}
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
{
check_for_bios_corruption();
schedule_delayed_work(&bios_check_work,
- round_jiffies_relative(corruption_check_period*HZ));
+ round_jiffies_relative(corruption_check_period*HZ));
}
static int start_periodic_check_for_corruption(void)
{
- if (!memory_corruption_check || corruption_check_period == 0)
+ if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
return 0;
printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5..f771ab6b49e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
}
#endif
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
static int __cpuinit nearby_node(int apicid)
{
int i, node;
for (i = apicid - 1; i >= 0; i--) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes;
+ u32 nodes, cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
+ cores_per_cu += ((ebx >> 8) & 3);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;
+ u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
+ cus_per_node = cores_per_node / cores_per_cu;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
- /* core id to be in range from 0 to (cores_per_node - 1) */
- c->cpu_core_id = c->cpu_core_id % cores_per_node;
+ /* core id has to be in the [0 .. cores_per_node - 1] range */
+ c->cpu_core_id %= cores_per_node;
+ c->compute_unit_id %= cus_per_node;
}
}
#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
int cpu = smp_processor_id();
int node;
unsigned apicid = c->apicid;
- node = per_cpu(cpu_llc_id, cpu);
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = per_cpu(cpu_llc_id, cpu);
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
-
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs
+ * which the K8 northbridge parsing fills in. Assume
+ * they are all increased by a constant offset, but in
+ * the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
int ht_nodeid = c->initial_apicid;
if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
+ __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
/* Pick a nearby node */
if (!node_online(node))
node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396b..e2ced0074a4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,7 +675,7 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
-#ifdef PROCESSOR_SELECT
+#ifdef CONFIG_PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
#endif
@@ -687,7 +687,7 @@ void __init early_cpu_init(void)
cpu_devs[count] = cpudev;
count++;
-#ifdef PROCESSOR_SELECT
+#ifdef CONFIG_PROCESSOR_SELECT
{
unsigned int j;
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
select_idle_routine(c);
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
}
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f279..4a5a42b842a 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
cmd_incomplete:
iowrite16(0, &pcch_hdr->status);
spin_unlock(&pcc_lock);
- return -EINVAL;
+ return 0;
}
static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6b..df86bc8c859 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
unsigned node;
int cpu = smp_processor_id();
- int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
- node = apicid_to_node[apicid];
+ node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE || !node_online(node)) {
/* reuse the value from init_cpu_to_node() */
node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8e..1ce1af2899d 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
struct _cache_attr {
struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+ ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
+ ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
+ unsigned int);
};
#ifdef CONFIG_AMD_NB
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
#define SHOW_CACHE_DISABLE(slot) \
static ssize_t \
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
+show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return show_cache_disable(this_leaf, buf, slot); \
}
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
- const char *buf, size_t count) \
+ const char *buf, size_t count, \
+ unsigned int cpu) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
+static ssize_t
+show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+{
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t
+store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
+ unsigned int cpu)
+{
+ unsigned long val;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ if (strict_strtoul(buf, 16, &val) < 0)
+ return -EINVAL;
+
+ if (amd_set_subcaches(cpu, val))
+ return -EINVAL;
+
+ return count;
+}
+
+static struct _cache_attr subcaches =
+ __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+
#else /* CONFIG_AMD_NB */
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
@@ -532,9 +568,9 @@ static int
__cpuinit cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- for_each_cpu(i, c->llc_shared_map) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, c->llc_shared_map) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name \
- (struct _cpuid4_info *this_leaf, char *buf) \
+static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
return n;
}
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 0, buf);
}
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
switch (this_leaf->eax.split.type) {
case CACHE_TYPE_DATA:
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ n += 1;
+
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
if (attrs == NULL)
return attrs = default_attrs;
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
attrs[n++] = &cache_disable_1.attr;
}
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ attrs[n++] = &subcaches.attr;
+
return attrs;
}
#endif
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
+ buf, this_leaf->cpu) :
0;
return ret;
}
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
ret = fattr->store ?
fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf, count) :
+ buf, count, this_leaf->cpu) :
0;
return ret;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52ac..167f97b5596 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
-#ifdef CONFIG_SMP
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-#endif
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(c->llc_shared_map);
+ i = cpumask_first(cpu_llc_shared_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
new file mode 100644
index 00000000000..7a8cebc9ff2
--- /dev/null
+++ b/arch/x86/kernel/devicetree.c
@@ -0,0 +1,441 @@
+/*
+ * Architecture specific OF callbacks.
+ */
+#include <linux/bootmem.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/of_pci.h>
+
+#include <asm/hpet.h>
+#include <asm/irq_controller.h>
+#include <asm/apic.h>
+#include <asm/pci_x86.h>
+
+__initdata u64 initial_dtb;
+char __initdata cmd_line[COMMAND_LINE_SIZE];
+static LIST_HEAD(irq_domains);
+static DEFINE_RAW_SPINLOCK(big_irq_lock);
+
+int __initdata of_ioapic;
+
+#ifdef CONFIG_X86_IO_APIC
+static void add_interrupt_host(struct irq_domain *ih)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_add(&ih->l, &irq_domains);
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+}
+#endif
+
+static struct irq_domain *get_ih_from_node(struct device_node *controller)
+{
+ struct irq_domain *ih, *found = NULL;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_for_each_entry(ih, &irq_domains, l) {
+ if (ih->controller == controller) {
+ found = ih;
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+ return found;
+}
+
+unsigned int irq_create_of_mapping(struct device_node *controller,
+ const u32 *intspec, unsigned int intsize)
+{
+ struct irq_domain *ih;
+ u32 virq, type;
+ int ret;
+
+ ih = get_ih_from_node(controller);
+ if (!ih)
+ return 0;
+ ret = ih->xlate(ih, intspec, intsize, &virq, &type);
+ if (ret)
+ return ret;
+ if (type == IRQ_TYPE_NONE)
+ return virq;
+ /* set the mask if it is different from current */
+ if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
+ set_irq_type(virq, type);
+ return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ /*
+ * The ioport address can be directly used by inX / outX
+ */
+ BUG_ON(address >= (1 << 16));
+ return (unsigned long)address;
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+void __init early_init_dt_scan_chosen_arch(unsigned long node)
+{
+ BUG();
+}
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+ BUG();
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+ return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
+}
+
+void __init add_dtb(u64 data)
+{
+ initial_dtb = data + offsetof(struct setup_data, data);
+}
+
+/*
+ * CE4100 ids. Will be moved to machine_device_initcall() once we have it.
+ */
+static struct of_device_id __initdata ce4100_ids[] = {
+ { .compatible = "intel,ce4100-cp", },
+ { .compatible = "isa", },
+ { .compatible = "pci", },
+ {},
+};
+
+static int __init add_bus_probe(void)
+{
+ if (!of_have_populated_dt())
+ return 0;
+
+ return of_platform_bus_probe(NULL, ce4100_ids, NULL);
+}
+module_init(add_bus_probe);
+
+#ifdef CONFIG_PCI
+static int x86_of_pci_irq_enable(struct pci_dev *dev)
+{
+ struct of_irq oirq;
+ u32 virq;
+ int ret;
+ u8 pin;
+
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (ret)
+ return ret;
+ if (!pin)
+ return 0;
+
+ ret = of_irq_map_pci(dev, &oirq);
+ if (ret)
+ return ret;
+
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ if (virq == 0)
+ return -EINVAL;
+ dev->irq = virq;
+ return 0;
+}
+
+static void x86_of_pci_irq_disable(struct pci_dev *dev)
+{
+}
+
+void __cpuinit x86_of_pci_init(void)
+{
+ struct device_node *np;
+
+ pcibios_enable_irq = x86_of_pci_irq_enable;
+ pcibios_disable_irq = x86_of_pci_irq_disable;
+
+ for_each_node_by_type(np, "pci") {
+ const void *prop;
+ struct pci_bus *bus;
+ unsigned int bus_min;
+ struct device_node *child;
+
+ prop = of_get_property(np, "bus-range", NULL);
+ if (!prop)
+ continue;
+ bus_min = be32_to_cpup(prop);
+
+ bus = pci_find_bus(0, bus_min);
+ if (!bus) {
+ printk(KERN_ERR "Can't find a node for bus %s.\n",
+ np->full_name);
+ continue;
+ }
+
+ if (bus->self)
+ bus->self->dev.of_node = np;
+ else
+ bus->dev.of_node = np;
+
+ for_each_child_of_node(np, child) {
+ struct pci_dev *dev;
+ u32 devfn;
+
+ prop = of_get_property(child, "reg", NULL);
+ if (!prop)
+ continue;
+
+ devfn = (be32_to_cpup(prop) >> 8) & 0xff;
+ dev = pci_get_slot(bus, devfn);
+ if (!dev)
+ continue;
+ dev->dev.of_node = child;
+ pci_dev_put(dev);
+ }
+ }
+}
+#endif
+
+static void __init dtb_setup_hpet(void)
+{
+#ifdef CONFIG_HPET_TIMER
+ struct device_node *dn;
+ struct resource r;
+ int ret;
+
+ dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-hpet");
+ if (!dn)
+ return;
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ WARN_ON(1);
+ return;
+ }
+ hpet_address = r.start;
+#endif
+}
+
+static void __init dtb_lapic_setup(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ struct device_node *dn;
+ struct resource r;
+ int ret;
+
+ dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic");
+ if (!dn)
+ return;
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (WARN_ON(ret))
+ return;
+
+ /* Did the boot loader setup the local APIC ? */
+ if (!cpu_has_apic) {
+ if (apic_force_enable(r.start))
+ return;
+ }
+ smp_found_config = 1;
+ pic_mode = 1;
+ register_lapic_address(r.start);
+ generic_processor_info(boot_cpu_physical_apicid,
+ GET_APIC_VERSION(apic_read(APIC_LVR)));
+#endif
+}
+
+#ifdef CONFIG_X86_IO_APIC
+static unsigned int ioapic_id;
+
+static void __init dtb_add_ioapic(struct device_node *dn)
+{
+ struct resource r;
+ int ret;
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Can't obtain address from node %s.\n",
+ dn->full_name);
+ return;
+ }
+ mp_register_ioapic(++ioapic_id, r.start, gsi_top);
+}
+
+static void __init dtb_ioapic_setup(void)
+{
+ struct device_node *dn;
+
+ for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
+ dtb_add_ioapic(dn);
+
+ if (nr_ioapics) {
+ of_ioapic = 1;
+ return;
+ }
+ printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+}
+#else
+static void __init dtb_ioapic_setup(void) {}
+#endif
+
+static void __init dtb_apic_setup(void)
+{
+ dtb_lapic_setup();
+ dtb_ioapic_setup();
+}
+
+#ifdef CONFIG_OF_FLATTREE
+static void __init x86_flattree_get_config(void)
+{
+ u32 size, map_len;
+ void *new_dtb;
+
+ if (!initial_dtb)
+ return;
+
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK),
+ (u64)sizeof(struct boot_param_header));
+
+ initial_boot_params = early_memremap(initial_dtb, map_len);
+ size = be32_to_cpu(initial_boot_params->totalsize);
+ if (map_len < size) {
+ early_iounmap(initial_boot_params, map_len);
+ initial_boot_params = early_memremap(initial_dtb, size);
+ map_len = size;
+ }
+
+ new_dtb = alloc_bootmem(size);
+ memcpy(new_dtb, initial_boot_params, size);
+ early_iounmap(initial_boot_params, map_len);
+
+ initial_boot_params = new_dtb;
+
+ /* root level address cells */
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+
+ unflatten_device_tree();
+}
+#else
+static inline void x86_flattree_get_config(void) { }
+#endif
+
+void __init x86_dtb_init(void)
+{
+ x86_flattree_get_config();
+
+ if (!of_have_populated_dt())
+ return;
+
+ dtb_setup_hpet();
+ dtb_apic_setup();
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+struct of_ioapic_type {
+ u32 out_type;
+ u32 trigger;
+ u32 polarity;
+};
+
+static struct of_ioapic_type of_ioapic_type[] =
+{
+ {
+ .out_type = IRQ_TYPE_EDGE_RISING,
+ .trigger = IOAPIC_EDGE,
+ .polarity = 1,
+ },
+ {
+ .out_type = IRQ_TYPE_LEVEL_LOW,
+ .trigger = IOAPIC_LEVEL,
+ .polarity = 0,
+ },
+ {
+ .out_type = IRQ_TYPE_LEVEL_HIGH,
+ .trigger = IOAPIC_LEVEL,
+ .polarity = 1,
+ },
+ {
+ .out_type = IRQ_TYPE_EDGE_FALLING,
+ .trigger = IOAPIC_EDGE,
+ .polarity = 0,
+ },
+};
+
+static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type)
+{
+ struct io_apic_irq_attr attr;
+ struct of_ioapic_type *it;
+ u32 line, idx, type;
+
+ if (intsize < 2)
+ return -EINVAL;
+
+ line = *intspec;
+ idx = (u32) id->priv;
+ *out_hwirq = line + mp_gsi_routing[idx].gsi_base;
+
+ intspec++;
+ type = *intspec;
+
+ if (type >= ARRAY_SIZE(of_ioapic_type))
+ return -EINVAL;
+
+ it = of_ioapic_type + type;
+ *out_type = it->out_type;
+
+ set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
+
+ return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
+}
+
+static void __init ioapic_add_ofnode(struct device_node *np)
+{
+ struct resource r;
+ int i, ret;
+
+ ret = of_address_to_resource(np, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Failed to obtain address for %s\n",
+ np->full_name);
+ return;
+ }
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (r.start == mp_ioapics[i].apicaddr) {
+ struct irq_domain *id;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ BUG_ON(!id);
+ id->controller = np;
+ id->xlate = ioapic_xlate;
+ id->priv = (void *)i;
+ add_interrupt_host(id);
+ return;
+ }
+ }
+ printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
+}
+
+void __init x86_add_irq_domains(void)
+{
+ struct device_node *dp;
+
+ if (!of_have_populated_dt())
+ return;
+
+ for_each_node_with_property(dp, "interrupt-controller") {
+ if (of_device_is_compatible(dp, "intel,ce4100-ioapic"))
+ ioapic_add_ofnode(dp);
+ }
+}
+#else
+void __init x86_add_irq_domains(void) { }
+#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 294f26da0c0..cdf5bfd9d4d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -667,21 +667,15 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+void __init parse_e820_ext(struct setup_data *sdata)
{
- u32 map_len;
int entries;
struct e820entry *extmap;
entries = sdata->len / sizeof(struct e820entry);
- map_len = sdata->len + sizeof(struct setup_data);
- if (map_len > PAGE_SIZE)
- sdata = early_ioremap(pa_data, map_len);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- if (map_len > PAGE_SIZE)
- early_iounmap(sdata, map_len);
printk(KERN_INFO "extended physical RAM map:\n");
e820_print_map("extended");
}
@@ -847,15 +841,21 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
-#ifdef CONFIG_X86_32
if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_PSE);
return 0;
- }
+#else
+ printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+ return -EINVAL;
#endif
+ }
userdef = 1;
mem_size = memparse(p, &p);
+ /* don't remove all of memory when handling "mem={invalid}" param */
+ if (mem_size == 0)
+ return -EINVAL;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f5accf8eaa7..fa41f7298c8 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -397,7 +397,7 @@ sysenter_past_esp:
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
@@ -1411,8 +1411,7 @@ END(general_protection)
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
RING0_EC_FRAME
- pushl $do_async_page_fault
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_async_page_fault
jmp error_code
CFI_ENDPROC
END(apf_page_fault)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0a0ed794edb..b72b4a6466a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -977,9 +977,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
invalidate_interrupt\idx smp_invalidate_interrupt
+.endif
.endr
#endif
@@ -1250,7 +1253,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
-END(do_hypervisor_callback)
+END(xen_do_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 767d6c43de3..ce0be7cd085 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
*/
KERNEL_PAGES = LOWMEM_PAGES
-INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
+INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
RESERVE_BRK(pagetables, INIT_MAP_SIZE)
/*
@@ -137,7 +137,7 @@ ENTRY(startup_32)
movsl
1:
-#ifdef CONFIG_OLPC_OPENFIRMWARE
+#ifdef CONFIG_OLPC
/* save OFW's pgdir table for later use when calling into OFW */
movl %cr3, %eax
movl %eax, pa(olpc_ofw_pgd)
@@ -623,7 +623,7 @@ ENTRY(initial_code)
* BSS section
*/
__PAGE_ALIGNED_BSS
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
#ifdef CONFIG_X86_PAE
initial_pg_pmd:
.fill 1024*KPMDS,4,0
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir)
#ifdef CONFIG_X86_PAE
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(initial_page_table)
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
@@ -662,7 +662,7 @@ ENTRY(initial_page_table)
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
- .align PAGE_SIZE_asm /* needs to be page-sized too */
+ .align PAGE_SIZE /* needs to be page-sized too */
#endif
.data
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4ff5968f12d..bfe8f729e08 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -503,7 +503,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
if (!irq)
return -EINVAL;
- set_irq_data(irq, dev);
+ irq_set_handler_data(irq, dev);
if (hpet_setup_msi_irq(irq))
return -EINVAL;
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 20757cb2efa..d9ca749c123 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -112,7 +112,7 @@ static void make_8259A_irq(unsigned int irq)
{
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
- set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+ irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
i8259A_chip.name);
enable_irq(irq);
}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec59af..8c968974253 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,22 +14,9 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <linux/bitmap.h>
#include <asm/syscalls.h>
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base,
- unsigned int extent, int new_value)
-{
- unsigned int i;
-
- for (i = base; i < base + extent; i++) {
- if (new_value)
- __set_bit(i, bitmap);
- else
- __clear_bit(i, bitmap);
- }
-}
-
/*
* this changes the io permissions bitmap in the current task.
*/
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
*/
tss = &per_cpu(init_tss, get_cpu());
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+ if (turn_on)
+ bitmap_clear(t->io_bitmap_ptr, from, num);
+ else
+ bitmap_set(t->io_bitmap_ptr, from, num);
/*
* Search for a (possibly new) maximum. This is simple and stupid,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 387b6a0c9e8..948a31eae75 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -44,9 +44,9 @@ void ack_bad_irq(unsigned int irq)
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
- * /proc/interrupts printing:
+ * /proc/interrupts printing for arch specific interrupts
*/
-static int show_other_interrupts(struct seq_file *p, int prec)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
int j;
@@ -122,59 +122,6 @@ static int show_other_interrupts(struct seq_file *p, int prec)
return 0;
}
-int show_interrupts(struct seq_file *p, void *v)
-{
- unsigned long flags, any_count = 0;
- int i = *(loff_t *) v, j, prec;
- struct irqaction *action;
- struct irq_desc *desc;
-
- if (i > nr_irqs)
- return 0;
-
- for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
- j *= 10;
-
- if (i == nr_irqs)
- return show_other_interrupts(p, prec);
-
- /* print header */
- if (i == 0) {
- seq_printf(p, "%*s", prec + 8, "");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d", j);
- seq_putc(p, '\n');
- }
-
- desc = irq_to_desc(i);
- if (!desc)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- for_each_online_cpu(j)
- any_count |= kstat_irqs_cpu(i, j);
- action = desc->action;
- if (!action && !any_count)
- goto out;
-
- seq_printf(p, "%*d: ", prec, i);
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
- seq_printf(p, " %8s", desc->irq_data.chip->name);
- seq_printf(p, "-%-8s", desc->name);
-
- if (action) {
- seq_printf(p, " %s", action->name);
- while ((action = action->next) != NULL)
- seq_printf(p, ", %s", action->name);
- }
-
- seq_putc(p, '\n');
-out:
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
-}
-
/*
* /proc/stat helpers
*/
@@ -276,15 +223,6 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
-#ifdef CONFIG_OF
-unsigned int irq_create_of_mapping(struct device_node *controller,
- const u32 *intspec, unsigned int intsize)
-{
- return intspec[0];
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
@@ -293,6 +231,7 @@ void fixup_irqs(void)
static int warned;
struct irq_desc *desc;
struct irq_data *data;
+ struct irq_chip *chip;
for_each_irq_desc(irq, desc) {
int break_affinity = 0;
@@ -307,10 +246,10 @@ void fixup_irqs(void)
/* interrupt's are disabled at this point */
raw_spin_lock(&desc->lock);
- data = &desc->irq_data;
+ data = irq_desc_get_irq_data(desc);
affinity = data->affinity;
if (!irq_has_action(irq) ||
- cpumask_equal(affinity, cpu_online_mask)) {
+ cpumask_subset(affinity, cpu_online_mask)) {
raw_spin_unlock(&desc->lock);
continue;
}
@@ -327,16 +266,17 @@ void fixup_irqs(void)
affinity = cpu_all_mask;
}
- if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
- data->chip->irq_mask(data);
+ chip = irq_data_get_irq_chip(data);
+ if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
+ chip->irq_mask(data);
- if (data->chip->irq_set_affinity)
- data->chip->irq_set_affinity(data, affinity, true);
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(data, affinity, true);
else if (!(warned++))
set_affinity = 0;
- if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
- data->chip->irq_unmask(data);
+ if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+ chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
@@ -368,10 +308,11 @@ void fixup_irqs(void)
irq = __this_cpu_read(vector_irq[vector]);
desc = irq_to_desc(irq);
- data = &desc->irq_data;
+ data = irq_desc_get_irq_data(desc);
+ chip = irq_data_get_irq_chip(data);
raw_spin_lock(&desc->lock);
- if (data->chip->irq_retrigger)
- data->chip->irq_retrigger(data);
+ if (chip->irq_retrigger)
+ chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e973958..f470e4ef993 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -25,6 +25,7 @@
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
+#include <asm/prom.h>
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -71,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
static struct irqaction fpu_irq = {
.handler = math_error_irq,
.name = "fpu",
+ .flags = IRQF_NO_THREAD,
};
#endif
@@ -80,6 +82,7 @@ static struct irqaction fpu_irq = {
static struct irqaction irq2 = {
.handler = no_action,
.name = "cascade",
+ .flags = IRQF_NO_THREAD,
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
@@ -110,7 +113,7 @@ void __init init_ISA_irqs(void)
legacy_pic->init(0);
for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
- set_irq_chip_and_handler_name(i, chip, handle_level_irq, name);
+ irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
}
void __init init_IRQ(void)
@@ -118,6 +121,12 @@ void __init init_IRQ(void)
int i;
/*
+ * We probably need a better place for this, but it works for
+ * now ...
+ */
+ x86_add_irq_domains();
+
+ /*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
@@ -164,14 +173,77 @@ static void __init smp_intr_init(void)
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPIs for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+#define ALLOC_INVTLB_VEC(NR) \
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
+ invalidate_interrupt##NR)
+
+ switch (NUM_INVALIDATE_TLB_VECTORS) {
+ default:
+ ALLOC_INVTLB_VEC(31);
+ case 31:
+ ALLOC_INVTLB_VEC(30);
+ case 30:
+ ALLOC_INVTLB_VEC(29);
+ case 29:
+ ALLOC_INVTLB_VEC(28);
+ case 28:
+ ALLOC_INVTLB_VEC(27);
+ case 27:
+ ALLOC_INVTLB_VEC(26);
+ case 26:
+ ALLOC_INVTLB_VEC(25);
+ case 25:
+ ALLOC_INVTLB_VEC(24);
+ case 24:
+ ALLOC_INVTLB_VEC(23);
+ case 23:
+ ALLOC_INVTLB_VEC(22);
+ case 22:
+ ALLOC_INVTLB_VEC(21);
+ case 21:
+ ALLOC_INVTLB_VEC(20);
+ case 20:
+ ALLOC_INVTLB_VEC(19);
+ case 19:
+ ALLOC_INVTLB_VEC(18);
+ case 18:
+ ALLOC_INVTLB_VEC(17);
+ case 17:
+ ALLOC_INVTLB_VEC(16);
+ case 16:
+ ALLOC_INVTLB_VEC(15);
+ case 15:
+ ALLOC_INVTLB_VEC(14);
+ case 14:
+ ALLOC_INVTLB_VEC(13);
+ case 13:
+ ALLOC_INVTLB_VEC(12);
+ case 12:
+ ALLOC_INVTLB_VEC(11);
+ case 11:
+ ALLOC_INVTLB_VEC(10);
+ case 10:
+ ALLOC_INVTLB_VEC(9);
+ case 9:
+ ALLOC_INVTLB_VEC(8);
+ case 8:
+ ALLOC_INVTLB_VEC(7);
+ case 7:
+ ALLOC_INVTLB_VEC(6);
+ case 6:
+ ALLOC_INVTLB_VEC(5);
+ case 5:
+ ALLOC_INVTLB_VEC(4);
+ case 4:
+ ALLOC_INVTLB_VEC(3);
+ case 3:
+ ALLOC_INVTLB_VEC(2);
+ case 2:
+ ALLOC_INVTLB_VEC(1);
+ case 1:
+ ALLOC_INVTLB_VEC(0);
+ break;
+ }
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -243,7 +315,7 @@ void __init native_init_IRQ(void)
set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
- if (!acpi_ioapic)
+ if (!acpi_ioapic && !of_ioapic)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 0fe6d1a66c3..c5610384ab1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
unsigned int mpb[0];
};
-#define UCODE_MAX_SIZE 2048
#define UCODE_CONTAINER_SECTION_HDR 8
#define UCODE_CONTAINER_HEADER_SIZE 12
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 dummy;
- memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("microcode: CPU%d: AMD CPU family 0x%x not "
- "supported\n", cpu, c->x86);
+ pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
return -1;
}
+
rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
- pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
+ pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
+
return 0;
}
-static int get_matching_microcode(int cpu, void *mc, int rev)
+static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
+ int rev)
{
- struct microcode_header_amd *mc_header = mc;
unsigned int current_cpu_id;
u16 equiv_cpu_id = 0;
unsigned int i = 0;
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
if (!equiv_cpu_id)
return 0;
- if (mc_header->processor_rev_id != equiv_cpu_id)
+ if (mc_hdr->processor_rev_id != equiv_cpu_id)
return 0;
/* ucode might be chipset specific -- currently we don't support this */
- if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
- pr_err("CPU%d: loading of chipset specific code not yet supported\n",
+ if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+ pr_err("CPU%d: chipset specific code not yet supported\n",
cpu);
return 0;
}
- if (mc_header->patch_id <= rev)
+ if (mc_hdr->patch_id <= rev)
return 0;
return 1;
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu)
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
+ pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
return -1;
}
- pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
uci->cpu_sig.rev = rev;
return 0;
}
-static void *
-get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
+static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
{
- unsigned int total_size;
- u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
- void *mc;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ unsigned int max_size, actual_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+ switch (c->x86) {
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ case 0x15:
+ max_size = F15H_MPB_MAX_SIZE;
+ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ }
- get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
+ actual_size = buf[4] + (buf[5] << 8);
- if (section_hdr[0] != UCODE_UCODE_TYPE) {
- pr_err("error: invalid type field in container file section header\n");
- return NULL;
+ if (actual_size > size || actual_size > max_size) {
+ pr_err("section size mismatch\n");
+ return 0;
}
- total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
+ return actual_size;
+}
- if (total_size > size || total_size > UCODE_MAX_SIZE) {
- pr_err("error: size mismatch\n");
- return NULL;
+static struct microcode_header_amd *
+get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
+{
+ struct microcode_header_amd *mc = NULL;
+ unsigned int actual_size = 0;
+
+ if (buf[0] != UCODE_UCODE_TYPE) {
+ pr_err("invalid type field in container file section header\n");
+ goto out;
}
- mc = vzalloc(UCODE_MAX_SIZE);
+ actual_size = verify_ucode_size(cpu, buf, size);
+ if (!actual_size)
+ goto out;
+
+ mc = vzalloc(actual_size);
if (!mc)
- return NULL;
+ goto out;
- get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
- *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+ get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
+ *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
+out:
return mc;
}
static int install_equiv_cpu_table(const u8 *buf)
{
- u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
- unsigned int *buf_pos = (unsigned int *)container_hdr;
- unsigned long size;
-
- get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
-
- size = buf_pos[2];
-
- if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- pr_err("error: invalid type field in container file section header\n");
- return 0;
+ unsigned int *ibuf = (unsigned int *)buf;
+ unsigned int type = ibuf[1];
+ unsigned int size = ibuf[2];
+
+ if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
+ pr_err("empty section/"
+ "invalid type field in container file section header\n");
+ return -EINVAL;
}
equiv_cpu_table = vmalloc(size);
if (!equiv_cpu_table) {
pr_err("failed to allocate equivalent CPU table\n");
- return 0;
+ return -ENOMEM;
}
- buf += UCODE_CONTAINER_HEADER_SIZE;
- get_ucode_data(equiv_cpu_table, buf, size);
+ get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size);
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
}
@@ -223,16 +244,16 @@ static enum ucode_state
generic_load_microcode(int cpu, const u8 *data, size_t size)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct microcode_header_amd *mc_hdr = NULL;
+ unsigned int mc_size, leftover;
+ int offset;
const u8 *ucode_ptr = data;
void *new_mc = NULL;
- void *mc;
- int new_rev = uci->cpu_sig.rev;
- unsigned int leftover;
- unsigned long offset;
+ unsigned int new_rev = uci->cpu_sig.rev;
enum ucode_state state = UCODE_OK;
offset = install_equiv_cpu_table(ucode_ptr);
- if (!offset) {
+ if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
return UCODE_ERROR;
}
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
leftover = size - offset;
while (leftover) {
- unsigned int uninitialized_var(mc_size);
- struct microcode_header_amd *mc_header;
-
- mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
- if (!mc)
+ mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
+ if (!mc_hdr)
break;
- mc_header = (struct microcode_header_amd *)mc;
- if (get_matching_microcode(cpu, mc, new_rev)) {
+ if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
vfree(new_mc);
- new_rev = mc_header->patch_id;
- new_mc = mc;
+ new_rev = mc_hdr->patch_id;
+ new_mc = mc_hdr;
} else
- vfree(mc);
+ vfree(mc_hdr);
ucode_ptr += mc_size;
leftover -= mc_size;
}
- if (new_mc) {
- if (!leftover) {
- vfree(uci->mc);
- uci->mc = new_mc;
- pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
- } else {
- vfree(new_mc);
- state = UCODE_ERROR;
- }
- } else
+ if (!new_mc) {
state = UCODE_NFOUND;
+ goto free_table;
+ }
+ if (!leftover) {
+ vfree(uci->mc);
+ uci->mc = new_mc;
+ pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
+ cpu, uci->cpu_sig.rev, new_rev);
+ } else {
+ vfree(new_mc);
+ state = UCODE_ERROR;
+ }
+
+free_table:
free_equiv_cpu_table();
return state;
}
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
- const struct firmware *firmware;
- enum ucode_state ret;
+ const struct firmware *fw;
+ enum ucode_state ret = UCODE_NFOUND;
- if (request_firmware(&firmware, fw_name, device)) {
- printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
- return UCODE_NFOUND;
+ if (request_firmware(&fw, fw_name, device)) {
+ pr_err("failed to load file %s\n", fw_name);
+ goto out;
}
- if (*(u32 *)firmware->data != UCODE_MAGIC) {
- pr_err("invalid UCODE_MAGIC (0x%08x)\n",
- *(u32 *)firmware->data);
- return UCODE_ERROR;
+ ret = UCODE_ERROR;
+ if (*(u32 *)fw->data != UCODE_MAGIC) {
+ pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
+ goto fw_release;
}
- ret = generic_load_microcode(cpu, firmware->data, firmware->size);
+ ret = generic_load_microcode(cpu, fw->data, fw->size);
- release_firmware(firmware);
+fw_release:
+ release_firmware(fw);
+out:
return ret;
}
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu)
static struct microcode_ops microcode_amd_ops = {
.request_microcode_user = request_microcode_user,
- .request_microcode_fw = request_microcode_fw,
+ .request_microcode_fw = request_microcode_amd,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374a2ba..87af68e0e1e 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (err)
return err;
- if (microcode_init_cpu(cpu) == UCODE_ERROR)
- err = -EINVAL;
+ if (microcode_init_cpu(cpu) == UCODE_ERROR) {
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ return -EINVAL;
+ }
return err;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff455419898..99fa3adf014 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,12 +110,9 @@ void show_regs_common(void)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk(KERN_CONT " ");
- printk(KERN_CONT "%s %s", vendor, product);
- if (board) {
- printk(KERN_CONT "/");
- printk(KERN_CONT "%s", board);
- }
+ printk(KERN_CONT " %s %s", vendor, product);
+ if (board)
+ printk(KERN_CONT "/%s", board);
printk(KERN_CONT "\n");
}
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 6f39cab052d..3f2ad2640d8 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -6,6 +6,7 @@
#include <linux/acpi.h>
#include <linux/bcd.h>
#include <linux/pnp.h>
+#include <linux/of.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
@@ -236,6 +237,8 @@ static __init int add_rtc_cmos(void)
}
}
#endif
+ if (of_have_populated_dt())
+ return 0;
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3cfe26c025..b176f2b1f45 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
#endif
#include <asm/mce.h>
#include <asm/alternative.h>
+#include <asm/prom.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -293,10 +294,32 @@ static void __init init_gbpages(void)
else
direct_gbpages = 0;
}
+
+static void __init cleanup_highmap_brk_end(void)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ mmu_cr4_features = read_cr4();
+
+ /*
+ * _brk_end cannot change anymore, but it and _end may be
+ * located on different 2M pages. cleanup_highmap(), however,
+ * can only consider _end when it runs, so destroy any
+ * mappings beyond _brk_end here.
+ */
+ pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ pmd = pmd_offset(pud, _brk_end - 1);
+ while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+ pmd_clear(pmd);
+}
#else
static inline void init_gbpages(void)
{
}
+static inline void cleanup_highmap_brk_end(void)
+{
+}
#endif
static void __init reserve_brk(void)
@@ -307,6 +330,8 @@ static void __init reserve_brk(void)
/* Mark brk area as locked down and no longer taking any
new allocations */
_brk_start = 0;
+
+ cleanup_highmap_brk_end();
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -429,16 +454,30 @@ static void __init parse_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_memremap(pa_data, PAGE_SIZE);
+ u32 data_len, map_len;
+
+ map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
+ (u64)sizeof(struct setup_data));
+ data = early_memremap(pa_data, map_len);
+ data_len = data->len + sizeof(struct setup_data);
+ if (data_len > map_len) {
+ early_iounmap(data, map_len);
+ data = early_memremap(pa_data, data_len);
+ map_len = data_len;
+ }
+
switch (data->type) {
case SETUP_E820_EXT:
- parse_e820_ext(data, pa_data);
+ parse_e820_ext(data);
+ break;
+ case SETUP_DTB:
+ add_dtb(pa_data);
break;
default:
break;
}
pa_data = data->next;
- early_iounmap(data, PAGE_SIZE);
+ early_iounmap(data, map_len);
}
}
@@ -680,15 +719,6 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
-static u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -704,8 +734,6 @@ static u64 __init get_max_mapped(void)
void __init setup_arch(char **cmdline_p)
{
- int acpi = 0;
- int amd = 0;
unsigned long flags;
#ifdef CONFIG_X86_32
@@ -984,19 +1012,7 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init();
-#ifdef CONFIG_ACPI_NUMA
- /*
- * Parse SRAT to discover nodes.
- */
- acpi = acpi_numa_init();
-#endif
-
-#ifdef CONFIG_AMD_NUMA
- if (!acpi)
- amd = !amd_numa_init(0, max_pfn);
-#endif
-
- initmem_init(0, max_pfn, acpi, amd);
+ initmem_init();
memblock_find_dma_reserve();
dma32_reserve_bootmem();
@@ -1029,8 +1045,8 @@ void __init setup_arch(char **cmdline_p)
* Read APIC and some other early information from ACPI tables.
*/
acpi_boot_init();
-
sfi_init();
+ x86_dtb_init();
/*
* get boot-time SMP configuration:
@@ -1040,9 +1056,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
-#ifdef CONFIG_X86_64
init_cpu_to_node();
-#endif
init_apic_mappings();
ioapic_and_gsi_init();
@@ -1066,6 +1080,8 @@ void __init setup_arch(char **cmdline_p)
#endif
x86_init.oem.banner();
+ x86_init.timers.wallclock_init();
+
mcheck_init();
local_irq_save(flags);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b79685f7..71f4727da37 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
+#ifdef CONFIG_X86_32
+ per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
+#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
+#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void)
*/
set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
-#endif
/*
* Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+#ifdef CONFIG_X86_32
+ early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
+#endif
+#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 08776a95348..e9efdfd51c8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,6 +64,7 @@
#include <asm/mtrr.h>
#include <asm/mwait.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -71,10 +72,6 @@
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_APICID];
-#endif
-
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -130,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
-
-/* set up a mapping between cpu and node. */
-static void map_cpu_to_node(int cpu, int node)
-{
- printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
- cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node++)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = 0;
-}
-#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-#endif
-
-#ifdef CONFIG_X86_32
-static int boot_cpu_logical_apicid;
-
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = BAD_APICID };
-
-static void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = logical_smp_processor_id();
- int node = apic->apicid_to_node(apicid);
-
- if (!node_online(node))
- node = first_online_node;
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, node);
-}
-
-void numa_remove_cpu(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-#else
-#define map_cpu_to_logical_apicid() do {} while (0)
-#endif
-
/*
* Report back to the Boot Processor.
* Running on AP.
@@ -259,7 +202,6 @@ static void __cpuinit smp_callin(void)
apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
- map_cpu_to_logical_apicid();
/*
* Need to setup vector mappings before we enable interrupts.
@@ -355,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* In this case, llc_shared_map is a pointer to a cpumask. */
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- struct cpumask *llc = dst->llc_shared_map;
- *dst = *src;
- dst->llc_shared_map = llc;
-}
-#else
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- *dst = *src;
-}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -381,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- copy_cpuinfo_x86(c, &boot_cpu_data);
+ *c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -389,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id)
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
{
- struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
- struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
-
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, c2->llc_shared_map);
- cpumask_set_cpu(cpu2, c1->llc_shared_map);
+ cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
}
@@ -414,6 +336,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
@@ -425,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -436,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -476,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
- return c->llc_shared_map;
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -945,6 +868,14 @@ int __cpuinit native_cpu_up(unsigned int cpu)
return 0;
}
+/**
+ * arch_disable_smp_support() - disables SMP support for x86 at runtime
+ */
+void arch_disable_smp_support(void)
+{
+ disable_ioapic_support();
+}
+
/*
* Fall back to non SMP mode after errors.
*
@@ -960,7 +891,6 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
physid_set_mask_of_physid(0, &phys_cpu_present_map);
- map_cpu_to_logical_apicid();
cpumask_set_cpu(0, cpu_sibling_mask(0));
cpumask_set_cpu(0, cpu_core_mask(0));
}
@@ -1045,7 +975,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
"(tell your hw vendor)\n");
}
smpboot_clear_io_apic();
- arch_disable_smp_support();
+ disable_ioapic_support();
return -1;
}
@@ -1089,21 +1019,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
- mb();
+
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
-#ifdef CONFIG_X86_32
- boot_cpu_logical_apicid = logical_smp_processor_id();
-#endif
+ cpumask_copy(cpu_callin_mask, cpumask_of(0));
+ mb();
+
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
@@ -1139,8 +1067,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
bsp_end_local_APIC_setup();
- map_cpu_to_logical_apicid();
-
if (apic->setup_portio_remap)
apic->setup_portio_remap();
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8..5f181742e8f 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,6 @@ ENTRY(sys_call_table)
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64 /* 340 */
+ .long sys_name_to_handle_at
+ .long sys_open_by_handle_at
+ .long sys_clock_adjtime
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 6d4341d5c52..0381e1f3bae 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -306,7 +306,7 @@ SECTIONS
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU(THREAD_SIZE)
+ PERCPU(PAGE_SIZE)
#endif
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e5..9796c2f3d07 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ceb2911aa43..c11514e9128 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -70,6 +70,7 @@ struct x86_init_ops x86_init __initdata = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
+ .wallclock_init = x86_init_noop,
},
.iommu = {
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0..b9ec1c74943 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -847,7 +847,7 @@ static void __init lguest_init_IRQ(void)
void lguest_setup_irq(unsigned int irq)
{
irq_alloc_desc_at(irq, 0);
- set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
+ irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
handle_level_irq, "level");
}
@@ -995,7 +995,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
static void lguest_time_init(void)
{
/* Set up the timer interrupt (0) to go to our simple timer routine */
- set_irq_handler(0, lguest_time_irq);
+ irq_set_handler(0, lguest_time_irq);
clocksource_register(&lguest_clock);
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 2cda60a06e6..e8e7e0d06f4 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -15,14 +15,12 @@
/* if you want SMP support, implement these with real spinlocks */
.macro LOCK reg
- pushfl
- CFI_ADJUST_CFA_OFFSET 4
+ pushfl_cfi
cli
.endm
.macro UNLOCK reg
- popfl
- CFI_ADJUST_CFA_OFFSET -4
+ popfl_cfi
.endm
#define BEGIN(op) \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 71e080de335..391a083674b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -14,14 +14,12 @@
#include <asm/dwarf2.h>
.macro SAVE reg
- pushl %\reg
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %\reg
CFI_REL_OFFSET \reg, 0
.endm
.macro RESTORE reg
- popl %\reg
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %\reg
CFI_RESTORE \reg
.endm
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index adbccd0bbb7..78d16a554db 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
ENTRY(csum_partial)
CFI_STARTPROC
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
@@ -132,11 +130,9 @@ ENTRY(csum_partial)
jz 8f
roll $8, %eax
8:
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
ret
CFI_ENDPROC
@@ -148,11 +144,9 @@ ENDPROC(csum_partial)
ENTRY(csum_partial)
CFI_STARTPROC
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
@@ -260,11 +254,9 @@ ENTRY(csum_partial)
jz 90f
roll $8, %eax
90:
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
ret
CFI_ENDPROC
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) )
.previous
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ecx # equivalent to addl $4,%esp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx # equivalent to addl $4,%esp
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
ret
CFI_ENDPROC
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 00000000000..0ecb8433e5a
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
+/*
+ * Normally compiler builtins are used, but sometimes the compiler calls out
+ * of line code. Based on asm-i386/string.h.
+ *
+ * This assembly file is re-written from memmove_64.c file.
+ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
+ */
+#define _STRING_C
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+#undef memmove
+
+/*
+ * Implement memmove(). This can handle overlap between src and dst.
+ *
+ * Input:
+ * rdi: dest
+ * rsi: src
+ * rdx: count
+ *
+ * Output:
+ * rax: dest
+ */
+ENTRY(memmove)
+ CFI_STARTPROC
+ /* Handle more 32bytes in loop */
+ mov %rdi, %rax
+ cmp $0x20, %rdx
+ jb 1f
+
+ /* Decide forward/backward copy mode */
+ cmp %rdi, %rsi
+ jb 2f
+
+ /*
+ * movsq instruction have many startup latency
+ * so we handle small size by general register.
+ */
+ cmp $680, %rdx
+ jb 3f
+ /*
+ * movsq instruction is only good for aligned case.
+ */
+
+ cmpb %dil, %sil
+ je 4f
+3:
+ sub $0x20, %rdx
+ /*
+ * We gobble 32byts forward in each loop.
+ */
+5:
+ sub $0x20, %rdx
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq 2*8(%rsi), %r9
+ movq 3*8(%rsi), %r8
+ leaq 4*8(%rsi), %rsi
+
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, 2*8(%rdi)
+ movq %r8, 3*8(%rdi)
+ leaq 4*8(%rdi), %rdi
+ jae 5b
+ addq $0x20, %rdx
+ jmp 1f
+ /*
+ * Handle data forward by movsq.
+ */
+ .p2align 4
+4:
+ movq %rdx, %rcx
+ movq -8(%rsi, %rdx), %r11
+ lea -8(%rdi, %rdx), %r10
+ shrq $3, %rcx
+ rep movsq
+ movq %r11, (%r10)
+ jmp 13f
+ /*
+ * Handle data backward by movsq.
+ */
+ .p2align 4
+7:
+ movq %rdx, %rcx
+ movq (%rsi), %r11
+ movq %rdi, %r10
+ leaq -8(%rsi, %rdx), %rsi
+ leaq -8(%rdi, %rdx), %rdi
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ movq %r11, (%r10)
+ jmp 13f
+
+ /*
+ * Start to prepare for backward copy.
+ */
+ .p2align 4
+2:
+ cmp $680, %rdx
+ jb 6f
+ cmp %dil, %sil
+ je 7b
+6:
+ /*
+ * Calculate copy position to tail.
+ */
+ addq %rdx, %rsi
+ addq %rdx, %rdi
+ subq $0x20, %rdx
+ /*
+ * We gobble 32byts backward in each loop.
+ */
+8:
+ subq $0x20, %rdx
+ movq -1*8(%rsi), %r11
+ movq -2*8(%rsi), %r10
+ movq -3*8(%rsi), %r9
+ movq -4*8(%rsi), %r8
+ leaq -4*8(%rsi), %rsi
+
+ movq %r11, -1*8(%rdi)
+ movq %r10, -2*8(%rdi)
+ movq %r9, -3*8(%rdi)
+ movq %r8, -4*8(%rdi)
+ leaq -4*8(%rdi), %rdi
+ jae 8b
+ /*
+ * Calculate copy position to head.
+ */
+ addq $0x20, %rdx
+ subq %rdx, %rsi
+ subq %rdx, %rdi
+1:
+ cmpq $16, %rdx
+ jb 9f
+ /*
+ * Move data from 16 bytes to 31 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq -2*8(%rsi, %rdx), %r9
+ movq -1*8(%rsi, %rdx), %r8
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, -2*8(%rdi, %rdx)
+ movq %r8, -1*8(%rdi, %rdx)
+ jmp 13f
+ .p2align 4
+9:
+ cmpq $8, %rdx
+ jb 10f
+ /*
+ * Move data from 8 bytes to 15 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq -1*8(%rsi, %rdx), %r10
+ movq %r11, 0*8(%rdi)
+ movq %r10, -1*8(%rdi, %rdx)
+ jmp 13f
+10:
+ cmpq $4, %rdx
+ jb 11f
+ /*
+ * Move data from 4 bytes to 7 bytes.
+ */
+ movl (%rsi), %r11d
+ movl -4(%rsi, %rdx), %r10d
+ movl %r11d, (%rdi)
+ movl %r10d, -4(%rdi, %rdx)
+ jmp 13f
+11:
+ cmp $2, %rdx
+ jb 12f
+ /*
+ * Move data from 2 bytes to 3 bytes.
+ */
+ movw (%rsi), %r11w
+ movw -2(%rsi, %rdx), %r10w
+ movw %r11w, (%rdi)
+ movw %r10w, -2(%rdi, %rdx)
+ jmp 13f
+12:
+ cmp $1, %rdx
+ jb 13f
+ /*
+ * Move data for 1 byte.
+ */
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+13:
+ retq
+ CFI_ENDPROC
+ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b3..00000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* Normally compiler builtins are used, but sometimes the compiler calls out
- of line code. Based on asm-i386/string.h.
- */
-#define _STRING_C
-#include <linux/string.h>
-#include <linux/module.h>
-
-#undef memmove
-void *memmove(void *dest, const void *src, size_t count)
-{
- unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
- char *ret;
-
- __asm__ __volatile__(
- /* Handle more 32bytes in loop */
- "mov %2, %3\n\t"
- "cmp $0x20, %0\n\t"
- "jb 1f\n\t"
-
- /* Decide forward/backward copy mode */
- "cmp %2, %1\n\t"
- "jb 2f\n\t"
-
- /*
- * movsq instruction have many startup latency
- * so we handle small size by general register.
- */
- "cmp $680, %0\n\t"
- "jb 3f\n\t"
- /*
- * movsq instruction is only good for aligned case.
- */
- "cmpb %%dil, %%sil\n\t"
- "je 4f\n\t"
- "3:\n\t"
- "sub $0x20, %0\n\t"
- /*
- * We gobble 32byts forward in each loop.
- */
- "5:\n\t"
- "sub $0x20, %0\n\t"
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq 2*8(%1), %6\n\t"
- "movq 3*8(%1), %7\n\t"
- "leaq 4*8(%1), %1\n\t"
-
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, 2*8(%2)\n\t"
- "movq %7, 3*8(%2)\n\t"
- "leaq 4*8(%2), %2\n\t"
- "jae 5b\n\t"
- "addq $0x20, %0\n\t"
- "jmp 1f\n\t"
- /*
- * Handle data forward by movsq.
- */
- ".p2align 4\n\t"
- "4:\n\t"
- "movq %0, %8\n\t"
- "movq -8(%1, %0), %4\n\t"
- "lea -8(%2, %0), %5\n\t"
- "shrq $3, %8\n\t"
- "rep movsq\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
- /*
- * Handle data backward by movsq.
- */
- ".p2align 4\n\t"
- "7:\n\t"
- "movq %0, %8\n\t"
- "movq (%1), %4\n\t"
- "movq %2, %5\n\t"
- "leaq -8(%1, %0), %1\n\t"
- "leaq -8(%2, %0), %2\n\t"
- "shrq $3, %8\n\t"
- "std\n\t"
- "rep movsq\n\t"
- "cld\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
-
- /*
- * Start to prepare for backward copy.
- */
- ".p2align 4\n\t"
- "2:\n\t"
- "cmp $680, %0\n\t"
- "jb 6f \n\t"
- "cmp %%dil, %%sil\n\t"
- "je 7b \n\t"
- "6:\n\t"
- /*
- * Calculate copy position to tail.
- */
- "addq %0, %1\n\t"
- "addq %0, %2\n\t"
- "subq $0x20, %0\n\t"
- /*
- * We gobble 32byts backward in each loop.
- */
- "8:\n\t"
- "subq $0x20, %0\n\t"
- "movq -1*8(%1), %4\n\t"
- "movq -2*8(%1), %5\n\t"
- "movq -3*8(%1), %6\n\t"
- "movq -4*8(%1), %7\n\t"
- "leaq -4*8(%1), %1\n\t"
-
- "movq %4, -1*8(%2)\n\t"
- "movq %5, -2*8(%2)\n\t"
- "movq %6, -3*8(%2)\n\t"
- "movq %7, -4*8(%2)\n\t"
- "leaq -4*8(%2), %2\n\t"
- "jae 8b\n\t"
- /*
- * Calculate copy position to head.
- */
- "addq $0x20, %0\n\t"
- "subq %0, %1\n\t"
- "subq %0, %2\n\t"
- "1:\n\t"
- "cmpq $16, %0\n\t"
- "jb 9f\n\t"
- /*
- * Move data from 16 bytes to 31 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq -2*8(%1, %0), %6\n\t"
- "movq -1*8(%1, %0), %7\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, -2*8(%2, %0)\n\t"
- "movq %7, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- ".p2align 4\n\t"
- "9:\n\t"
- "cmpq $8, %0\n\t"
- "jb 10f\n\t"
- /*
- * Move data from 8 bytes to 15 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq -1*8(%1, %0), %5\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- "10:\n\t"
- "cmpq $4, %0\n\t"
- "jb 11f\n\t"
- /*
- * Move data from 4 bytes to 7 bytes.
- */
- "movl (%1), %4d\n\t"
- "movl -4(%1, %0), %5d\n\t"
- "movl %4d, (%2)\n\t"
- "movl %5d, -4(%2, %0)\n\t"
- "jmp 13f\n\t"
- "11:\n\t"
- "cmp $2, %0\n\t"
- "jb 12f\n\t"
- /*
- * Move data from 2 bytes to 3 bytes.
- */
- "movw (%1), %4w\n\t"
- "movw -2(%1, %0), %5w\n\t"
- "movw %4w, (%2)\n\t"
- "movw %5w, -2(%2, %0)\n\t"
- "jmp 13f\n\t"
- "12:\n\t"
- "cmp $1, %0\n\t"
- "jb 13f\n\t"
- /*
- * Move data for 1 byte.
- */
- "movb (%1), %4b\n\t"
- "movb %4b, (%2)\n\t"
- "13:\n\t"
- : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
- "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
- :"0" (count),
- "1" (src),
- "2" (dest)
- :"memory");
-
- return ret;
-
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 41fcf00e49d..67743977398 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -23,43 +23,50 @@
#include <asm/dwarf2.h>
#define save_common_regs \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rcx; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11
+ pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
+ pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
+ pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
+ pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
+ pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
+ pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
+ pushq_cfi %r11; CFI_REL_OFFSET r11, 0
#define restore_common_regs \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rcx; \
- popq %rsi; \
- popq %rdi
+ popq_cfi %r11; CFI_RESTORE r11; \
+ popq_cfi %r10; CFI_RESTORE r10; \
+ popq_cfi %r9; CFI_RESTORE r9; \
+ popq_cfi %r8; CFI_RESTORE r8; \
+ popq_cfi %rcx; CFI_RESTORE rcx; \
+ popq_cfi %rsi; CFI_RESTORE rsi; \
+ popq_cfi %rdi; CFI_RESTORE rdi
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
+ CFI_STARTPROC
save_common_regs
- pushq %rdx
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
movq %rax,%rdi
call rwsem_down_read_failed
- popq %rdx
+ popq_cfi %rdx
+ CFI_RESTORE rdx
restore_common_regs
ret
- ENDPROC(call_rwsem_down_read_failed)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
+ CFI_STARTPROC
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
ret
- ENDPROC(call_rwsem_down_write_failed)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
+ CFI_STARTPROC
decl %edx /* do nothing if still outstanding active readers */
jnz 1f
save_common_regs
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake)
call rwsem_wake
restore_common_regs
1: ret
- ENDPROC(call_rwsem_wake)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
+ CFI_STARTPROC
save_common_regs
- pushq %rdx
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
movq %rax,%rdi
call rwsem_downgrade_wake
- popq %rdx
+ popq_cfi %rdx
+ CFI_RESTORE rdx
restore_common_regs
ret
- ENDPROC(call_rwsem_downgrade_wake)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe474178..06691daa410 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
*/
#ifdef CONFIG_SMP
ENTRY(__write_lock_failed)
- CFI_STARTPROC simple
+ CFI_STARTPROC
FRAME
2: LOCK_PREFIX
addl $ RW_LOCK_BIAS,(%eax)
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed)
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx,0
call rwsem_down_read_failed
- pop %edx
- CFI_ADJUST_CFA_OFFSET -4
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
calll rwsem_down_write_failed
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_write_failed)
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake)
CFI_STARTPROC
decw %dx /* do nothing if still outstanding active readers */
jnz 1f
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
call rwsem_wake
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
1: ret
CFI_ENDPROC
ENDPROC(call_rwsem_wake)
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx,0
call rwsem_downgrade_wake
- pop %edx
- CFI_ADJUST_CFA_OFFSET -4
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 650b11e00ec..2930ae05d77 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -7,24 +7,6 @@
#include <linux/linkage.h>
-#define ARCH_TRACE_IRQS_ON \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_on; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
-#define ARCH_TRACE_IRQS_OFF \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_off; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in eax (arg1) */
.macro thunk_ra name,func
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5a542..782b082c9ff 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -22,26 +22,6 @@
CFI_ENDPROC
.endm
- /* rdi: arg1 ... normal C conventions. rax is passed from C. */
- .macro thunk_retrax name,func
- .globl \name
-\name:
- CFI_STARTPROC
- SAVE_ARGS
- call \func
- jmp restore_norax
- CFI_ENDPROC
- .endm
-
-
- .section .sched.text, "ax"
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
- thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
- thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
- thunk rwsem_wake_thunk,rwsem_wake
- thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
-#endif
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in rdi (arg1) */
.macro thunk_ra name,func
@@ -72,10 +52,3 @@ restore:
RESTORE_ARGS
ret
CFI_ENDPROC
-
- CFI_STARTPROC
- SAVE_ARGS
-restore_norax:
- RESTORE_ARGS 1
- ret
- CFI_ENDPROC
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 09df2f9a3d6..3e608edf995 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
+obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c435e..0919c26820d 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -26,9 +26,7 @@
#include <asm/apic.h>
#include <asm/amd_nb.h>
-static struct bootnode __initdata nodes[8];
static unsigned char __initdata nodeids[8];
-static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static __init int find_northbridge(void)
{
@@ -51,7 +49,7 @@ static __init int find_northbridge(void)
return num;
}
- return -1;
+ return -ENOENT;
}
static __init void early_get_boot_cpu_id(void)
@@ -69,17 +67,18 @@ static __init void early_get_boot_cpu_id(void)
#endif
}
-int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+int __init amd_numa_init(void)
{
- unsigned long start = PFN_PHYS(start_pfn);
- unsigned long end = PFN_PHYS(end_pfn);
+ unsigned long start = PFN_PHYS(0);
+ unsigned long end = PFN_PHYS(max_pfn);
unsigned numnodes;
unsigned long prevbase;
- int i, nb, found = 0;
+ int i, j, nb;
u32 nodeid, reg;
+ unsigned int bits, cores, apicid_base;
if (!early_pci_allowed())
- return -1;
+ return -EINVAL;
nb = find_northbridge();
if (nb < 0)
@@ -90,7 +89,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
- return -1;
+ return -ENOENT;
pr_info("Number of physical nodes %d\n", numnodes);
@@ -121,9 +120,9 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
if ((base >> 8) & 3 || (limit >> 8) & 3) {
pr_err("Node %d using interleaving mode %lx/%lx\n",
nodeid, (base >> 8) & 3, (limit >> 8) & 3);
- return -1;
+ return -EINVAL;
}
- if (node_isset(nodeid, nodes_parsed)) {
+ if (node_isset(nodeid, numa_nodes_parsed)) {
pr_info("Node %d already present, skipping\n",
nodeid);
continue;
@@ -160,117 +159,28 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
if (prevbase > base) {
pr_err("Node map not sorted %lx,%lx\n",
prevbase, base);
- return -1;
+ return -EINVAL;
}
pr_info("Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
- found++;
-
- nodes[nodeid].start = base;
- nodes[nodeid].end = limit;
-
prevbase = base;
-
- node_set(nodeid, nodes_parsed);
- }
-
- if (!found)
- return -1;
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
-void __init amd_get_nodes(struct bootnode *physnodes)
-{
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- physnodes[i].start = nodes[i].start;
- physnodes[i].end = nodes[i].end;
+ numa_add_memblk(nodeid, base, limit);
+ node_set(nodeid, numa_nodes_parsed);
}
-}
-
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for (i = 0; i < 8; i++)
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- return ret;
-}
-/*
- * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
- * setup to represent the physical topology but reflect the emulated
- * environment. For each emulated node, the real node which it appears on is
- * found and a fake pxm to nid mapping is created which mirrors the actual
- * locality. node_distance() then represents the correct distances between
- * emulated nodes by using the fake acpi mappings to pxms.
- */
-void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
-{
- unsigned int bits;
- unsigned int cores;
- unsigned int apicid_base = 0;
- int i;
+ if (!nodes_weight(numa_nodes_parsed))
+ return -ENOENT;
+ /*
+ * We seem to have valid NUMA configuration. Map apicids to nodes
+ * using the coreid bits from early_identify_cpu.
+ */
bits = boot_cpu_data.x86_coreid_bits;
cores = 1 << bits;
- early_get_boot_cpu_id();
- if (boot_cpu_physical_apicid > 0)
- apicid_base = boot_cpu_physical_apicid;
-
- for (i = 0; i < nr_nodes; i++) {
- int index;
- int nid;
- int j;
-
- nid = find_node_by_addr(nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
-
- index = nodeids[nid] << bits;
- if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
- for (j = apicid_base; j < cores + apicid_base; j++)
- fake_apicid_to_node[index + j] = i;
-#ifdef CONFIG_ACPI_NUMA
- __acpi_map_pxm_to_node(nid, i);
-#endif
- }
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-}
-#endif /* CONFIG_NUMA_EMU */
-
-int __init amd_scan_nodes(void)
-{
- unsigned int bits;
- unsigned int cores;
- unsigned int apicid_base;
- int i;
-
- BUG_ON(nodes_empty(nodes_parsed));
- node_possible_map = nodes_parsed;
- memnode_shift = compute_hash_shift(nodes, 8, NULL);
- if (memnode_shift < 0) {
- pr_err("No NUMA node hash function found. Contact maintainer\n");
- return -1;
- }
- pr_info("Using node hash shift of %d\n", memnode_shift);
-
- /* use the coreid bits from early_identify_cpu */
- bits = boot_cpu_data.x86_coreid_bits;
- cores = (1<<bits);
apicid_base = 0;
+
/* get the APIC ID of the BSP early for systems with apicid lifting */
early_get_boot_cpu_id();
if (boot_cpu_physical_apicid > 0) {
@@ -278,17 +188,9 @@ int __init amd_scan_nodes(void)
apicid_base = boot_cpu_physical_apicid;
}
- for_each_node_mask(i, node_possible_map) {
- int j;
-
- memblock_x86_register_active_regions(i,
- nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
+ for_each_node_mask(i, numa_nodes_parsed)
for (j = apicid_base; j < cores + apicid_base; j++)
- apicid_to_node[(i << bits) + j] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
+ set_apicid_to_node((i << bits) + j, i);
- numa_init_array();
return 0;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a..20e3f8702d1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
for (address = VMALLOC_START & PMD_MASK;
address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
-
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
pmd_t *ret;
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
if (!ret)
break;
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, unsigned int fault)
{
if (fault & VM_FAULT_OOM) {
+ /* Kernel mode? Handle exceptions or die: */
+ if (!(error_code & PF_USER)) {
+ up_read(&current->mm->mmap_sem);
+ no_context(regs, error_code, address);
+ return;
+ }
+
out_of_memory(regs, error_code, address);
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 947f42abe82..286d289b039 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,9 +18,9 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-unsigned long __initdata e820_table_start;
-unsigned long __meminitdata e820_table_end;
-unsigned long __meminitdata e820_table_top;
+unsigned long __initdata pgt_buf_start;
+unsigned long __meminitdata pgt_buf_end;
+unsigned long __meminitdata pgt_buf_top;
int after_bootmem;
@@ -33,7 +33,7 @@ int direct_gbpages
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
- unsigned long puds, pmds, ptes, tables, start;
+ unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
phys_addr_t base;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,29 +65,20 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
-#ifdef CONFIG_X86_32
- start = 0x7000;
-#else
- start = 0x8000;
+ good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
- base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
- tables, PAGE_SIZE);
+
+ base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
- e820_table_start = base >> PAGE_SHIFT;
- e820_table_end = e820_table_start;
- e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+ pgt_buf_start = base >> PAGE_SHIFT;
+ pgt_buf_end = pgt_buf_start;
+ pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+ end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}
struct map_range {
@@ -279,30 +270,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
load_cr3(swapper_pg_dir);
#endif
-#ifdef CONFIG_X86_64
- if (!after_bootmem && !start) {
- pud_t *pud;
- pmd_t *pmd;
-
- mmu_cr4_features = read_cr4();
-
- /*
- * _brk_end cannot change anymore, but it and _end may be
- * located on different 2M pages. cleanup_highmap(), however,
- * can only consider _end when it runs, so destroy any
- * mappings beyond _brk_end here.
- */
- pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- pmd = pmd_offset(pud, _brk_end - 1);
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
- pmd_clear(pmd);
- }
-#endif
__flush_tlb_all();
- if (!after_bootmem && e820_table_end > e820_table_start)
- memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
- e820_table_end << PAGE_SHIFT, "PGTABLE");
+ if (!after_bootmem && pgt_buf_end > pgt_buf_start)
+ memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
+ pgt_buf_end << PAGE_SHIFT, "PGTABLE");
if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c821074b7f0..73ad7ebd6e9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -62,10 +62,10 @@ bool __read_mostly __vmalloc_start_set = false;
static __init void *alloc_low_page(void)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
@@ -163,8 +163,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
- || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
+ && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
+ || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
pte_t *newpte;
int i;
@@ -644,8 +644,7 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af8..a08a62cb136 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
for (address = start; address <= end; address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
spinlock_t *pgt_lock;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(pgt_lock);
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -314,7 +314,7 @@ void __init cleanup_highmap(void)
static __ref void *alloc_low_page(unsigned long *phys)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
if (after_bootmem) {
@@ -324,7 +324,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
+static __ref void *map_low_page(void *virt)
+{
+ void *adr;
+ unsigned long phys, left;
+
+ if (after_bootmem)
+ return virt;
+
+ phys = __pa(virt);
+ left = phys & (PAGE_SIZE - 1);
+ adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
+ adr = (void *)(((unsigned long)adr) | left);
+
+ return adr;
+}
+
static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
- early_iounmap(adr, PAGE_SIZE);
+ early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
}
static unsigned long __meminit
@@ -386,15 +402,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
}
static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
- pgprot_t prot)
-{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
- return phys_pte_init(pte, address, end, prot);
-}
-
-static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pte_update(pmd, address,
+ pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+ last_map_addr = phys_pte_init(pte, address,
end, prot);
+ unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -468,18 +477,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
}
static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- unsigned long page_size_mask, pgprot_t prot)
-{
- pmd_t *pmd = pmd_offset(pud, 0);
- unsigned long last_map_addr;
-
- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
- __flush_tlb_all();
- return last_map_addr;
-}
-
-static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (pud_val(*pud)) {
if (!pud_large(*pud)) {
- last_map_addr = phys_pmd_update(pud, addr, end,
+ pmd = map_low_page(pmd_offset(pud, 0));
+ last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
+ unmap_low_page(pmd);
+ __flush_tlb_all();
continue;
}
/*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
return last_map_addr;
}
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
- unsigned long page_size_mask)
-{
- pud_t *pud;
-
- pud = (pud_t *)pgd_page_vaddr(*pgd);
-
- return phys_pud_init(pud, addr, end, page_size_mask);
-}
-
unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
next = end;
if (pgd_val(*pgd)) {
- last_map_addr = phys_pud_update(pgd, __pa(start),
+ pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+ last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
+ unmap_low_page(pud);
continue;
}
@@ -612,10 +603,9 @@ kernel_physical_mapping_init(unsigned long start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
- memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+ memblock_x86_register_active_regions(0, 0, max_pfn);
}
#endif
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d7887a3..9559d360fde 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt)
early_param("numa", numa_setup);
/*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
*/
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
+ cpu_to_node_map[cpu] = node;
+ return;
+ }
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ if (node != NUMA_NO_NODE)
+ set_cpu_numa_node(cpu, node);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void)
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
}
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+ int rr, i;
+
+ rr = first_node(node_online_map);
+ for (i = 0; i < nr_cpu_ids; i++) {
+ if (early_cpu_to_node(i) != NUMA_NO_NODE)
+ continue;
+ numa_set_node(i, rr);
+ rr = next_node(rr, node_online_map);
+ if (rr == MAX_NUMNODES)
+ rr = first_node(node_online_map);
+ }
+}
+
+static __init int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+ int cpu;
+ u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+ BUG_ON(cpu_to_apicid == NULL);
+
+ for_each_possible_cpu(cpu) {
+ int node = numa_cpu_node(cpu);
+
+ if (node == NUMA_NO_NODE)
+ continue;
+ if (!node_online(node))
+ node = find_near_online_node(node);
+ numa_set_node(cpu, node);
+ }
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+# ifndef CONFIG_NUMA_EMU
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+int __cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(__cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!cpu_possible(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ char buf[64];
+
+ if (node == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return NULL;
+ }
+ mask = node_to_cpumask_map[node];
+ if (!mask) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return NULL;
+ }
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu",
+ cpu, node, buf);
+ return mask;
+}
+
+# ifndef CONFIG_NUMA_EMU
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node)
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);
-#endif
+
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c9f27..bde3906420d 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+ return apic->x86_32_numa_cpu_node(cpu);
+}
+
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
@@ -346,8 +352,7 @@ static void init_remap_allocator(int nid)
(ulong) node_remap_end_vaddr[nid]);
}
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
int nid;
long kva_target_pfn;
@@ -361,6 +366,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
*/
get_memcfg_numa();
+ numa_init_array();
kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eeb..9ec0f209a6a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -13,31 +13,30 @@
#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
+#include <linux/acpi.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/numa.h>
#include <asm/acpi.h>
#include <asm/amd_nb.h>
+#include "numa_internal.h"
+
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-struct memnode memnode;
+nodemask_t numa_nodes_parsed __initdata;
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
+struct memnode memnode;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
-/*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+static struct numa_meminfo numa_meminfo __initdata;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
/*
* Given a shift value, try to populate memnodemap[]
@@ -46,16 +45,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
-static int __init populate_memnodemap(const struct bootnode *nodes,
- int numnodes, int shift, int *nodeids)
+static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
{
unsigned long addr, end;
int i, res = -1;
memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
- for (i = 0; i < numnodes; i++) {
- addr = nodes[i].start;
- end = nodes[i].end;
+ for (i = 0; i < mi->nr_blks; i++) {
+ addr = mi->blk[i].start;
+ end = mi->blk[i].end;
if (addr >= end)
continue;
if ((end >> shift) >= memnodemapsize)
@@ -63,12 +61,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
do {
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
return -1;
-
- if (!nodeids)
- memnodemap[addr >> shift] = i;
- else
- memnodemap[addr >> shift] = nodeids[i];
-
+ memnodemap[addr >> shift] = mi->blk[i].nid;
addr += (1UL << shift);
} while (addr < end);
res = 1;
@@ -86,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
addr = 0x8000;
nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
- nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
+ nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == MEMBLOCK_ERROR) {
printk(KERN_ERR
@@ -106,16 +99,15 @@ static int __init allocate_cachealigned_memnodemap(void)
* The LSB of all start and end addresses in the node map is the value of the
* maximum possible shift.
*/
-static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
- int numnodes)
+static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
{
int i, nodes_used = 0;
unsigned long start, end;
unsigned long bitfield = 0, memtop = 0;
- for (i = 0; i < numnodes; i++) {
- start = nodes[i].start;
- end = nodes[i].end;
+ for (i = 0; i < mi->nr_blks; i++) {
+ start = mi->blk[i].start;
+ end = mi->blk[i].end;
if (start >= end)
continue;
bitfield |= start;
@@ -131,18 +123,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
return i;
}
-int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
- int *nodeids)
+static int __init compute_hash_shift(const struct numa_meminfo *mi)
{
int shift;
- shift = extract_lsb_from_nodes(nodes, numnodes);
+ shift = extract_lsb_from_nodes(mi);
if (allocate_cachealigned_memnodemap())
return -1;
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
shift);
- if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
+ if (populate_memnodemap(mi, shift) != 1) {
printk(KERN_INFO "Your memory is not aligned you need to "
"rebuild your kernel with a bigger NODEMAPSIZE "
"shift=%d\n", shift);
@@ -188,6 +179,63 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
return NULL;
}
+static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
+ struct numa_meminfo *mi)
+{
+ /* ignore zero length blks */
+ if (start == end)
+ return 0;
+
+ /* whine about and ignore invalid blks */
+ if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
+ pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
+ nid, start, end);
+ return 0;
+ }
+
+ if (mi->nr_blks >= NR_NODE_MEMBLKS) {
+ pr_err("NUMA: too many memblk ranges\n");
+ return -EINVAL;
+ }
+
+ mi->blk[mi->nr_blks].start = start;
+ mi->blk[mi->nr_blks].end = end;
+ mi->blk[mi->nr_blks].nid = nid;
+ mi->nr_blks++;
+ return 0;
+}
+
+/**
+ * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
+ * @idx: Index of memblk to remove
+ * @mi: numa_meminfo to remove memblk from
+ *
+ * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
+ * decrementing @mi->nr_blks.
+ */
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
+{
+ mi->nr_blks--;
+ memmove(&mi->blk[idx], &mi->blk[idx + 1],
+ (mi->nr_blks - idx) * sizeof(mi->blk[0]));
+}
+
+/**
+ * numa_add_memblk - Add one numa_memblk to numa_meminfo
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * Add a new memblk to the default numa_meminfo.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+ return numa_add_memblk_to(nid, start, end, &numa_meminfo);
+}
+
/* Initialize bootmem allocator for a node */
void __init
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
@@ -234,696 +282,386 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
node_set_online(nodeid);
}
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
+/**
+ * numa_cleanup_meminfo - Cleanup a numa_meminfo
+ * @mi: numa_meminfo to clean up
+ *
+ * Sanitize @mi by merging and removing unncessary memblks. Also check for
+ * conflicts and clear unused memblks.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
*/
-void __init numa_init_array(void)
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
{
- int rr, i;
+ const u64 low = 0;
+ const u64 high = (u64)max_pfn << PAGE_SHIFT;
+ int i, j, k;
- rr = first_node(node_online_map);
- for (i = 0; i < nr_cpu_ids; i++) {
- if (early_cpu_to_node(i) != NUMA_NO_NODE)
- continue;
- numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
- }
-}
-
-#ifdef CONFIG_NUMA_EMU
-/* Numa emulation */
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
-static char *cmdline __initdata;
+ for (i = 0; i < mi->nr_blks; i++) {
+ struct numa_memblk *bi = &mi->blk[i];
-void __init numa_emu_cmdline(char *str)
-{
- cmdline = str;
-}
+ /* make sure all blocks are inside the limits */
+ bi->start = max(bi->start, low);
+ bi->end = min(bi->end, high);
-static int __init setup_physnodes(unsigned long start, unsigned long end,
- int acpi, int amd)
-{
- int ret = 0;
- int i;
-
- memset(physnodes, 0, sizeof(physnodes));
-#ifdef CONFIG_ACPI_NUMA
- if (acpi)
- acpi_get_nodes(physnodes, start, end);
-#endif
-#ifdef CONFIG_AMD_NUMA
- if (amd)
- amd_get_nodes(physnodes);
-#endif
- /*
- * Basic sanity checking on the physical node map: there may be errors
- * if the SRAT or AMD code incorrectly reported the topology or the mem=
- * kernel parameter is used.
- */
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (physnodes[i].start == physnodes[i].end)
- continue;
- if (physnodes[i].start > end) {
- physnodes[i].end = physnodes[i].start;
- continue;
- }
- if (physnodes[i].end < start) {
- physnodes[i].start = physnodes[i].end;
+ /* and there's no empty block */
+ if (bi->start == bi->end) {
+ numa_remove_memblk_from(i--, mi);
continue;
}
- if (physnodes[i].start < start)
- physnodes[i].start = start;
- if (physnodes[i].end > end)
- physnodes[i].end = end;
- ret++;
- }
-
- /*
- * If no physical topology was detected, a single node is faked to cover
- * the entire address space.
- */
- if (!ret) {
- physnodes[ret].start = start;
- physnodes[ret].end = end;
- ret = 1;
- }
- return ret;
-}
-
-static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
-{
- int i;
-
- BUG_ON(acpi && amd);
-#ifdef CONFIG_ACPI_NUMA
- if (acpi)
- acpi_fake_nodes(nodes, nr_nodes);
-#endif
-#ifdef CONFIG_AMD_NUMA
- if (amd)
- amd_fake_nodes(nodes, nr_nodes);
-#endif
- if (!acpi && !amd)
- for (i = 0; i < nr_cpu_ids; i++)
- numa_set_node(i, 0);
-}
-
-/*
- * Setups up nid to range from addr to addr + size. If the end
- * boundary is greater than max_addr, then max_addr is used instead.
- * The return value is 0 if there is additional memory left for
- * allocation past addr and -1 otherwise. addr is adjusted to be at
- * the end of the node.
- */
-static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
-{
- int ret = 0;
- nodes[nid].start = *addr;
- *addr += size;
- if (*addr >= max_addr) {
- *addr = max_addr;
- ret = -1;
- }
- nodes[nid].end = *addr;
- node_set(nid, node_possible_map);
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
- nodes[nid].start, nodes[nid].end,
- (nodes[nid].end - nodes[nid].start) >> 20);
- return ret;
-}
-
-/*
- * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr. The return value is the number of nodes allocated.
- */
-static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
-{
- nodemask_t physnode_mask = NODE_MASK_NONE;
- u64 size;
- int big;
- int ret = 0;
- int i;
-
- if (nr_nodes <= 0)
- return -1;
- if (nr_nodes > MAX_NUMNODES) {
- pr_info("numa=fake=%d too large, reducing to %d\n",
- nr_nodes, MAX_NUMNODES);
- nr_nodes = MAX_NUMNODES;
- }
-
- size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
- /*
- * Calculate the number of big nodes that can be allocated as a result
- * of consolidating the remainder.
- */
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
- FAKE_NODE_MIN_SIZE;
-
- size &= FAKE_NODE_MIN_HASH_MASK;
- if (!size) {
- pr_err("Not enough memory for each node. "
- "NUMA emulation disabled.\n");
- return -1;
- }
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
-
- /*
- * Continue to fill physical nodes with fake nodes until there is no
- * memory left on any of them.
- */
- while (nodes_weight(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 end = physnodes[i].start + size;
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
-
- if (ret < big)
- end += FAKE_NODE_MIN_SIZE;
+ for (j = i + 1; j < mi->nr_blks; j++) {
+ struct numa_memblk *bj = &mi->blk[j];
+ unsigned long start, end;
/*
- * Continue to add memory to this fake node if its
- * non-reserved memory is less than the per-node size.
+ * See whether there are overlapping blocks. Whine
+ * about but allow overlaps of the same nid. They
+ * will be merged below.
*/
- while (end - physnodes[i].start -
- memblock_x86_hole_size(physnodes[i].start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > physnodes[i].end) {
- end = physnodes[i].end;
- break;
+ if (bi->end > bj->start && bi->start < bj->end) {
+ if (bi->nid != bj->nid) {
+ pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
+ bi->nid, bi->start, bi->end,
+ bj->nid, bj->start, bj->end);
+ return -EINVAL;
}
+ pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
+ bi->nid, bi->start, bi->end,
+ bj->start, bj->end);
}
/*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
+ * Join together blocks on the same node, holes
+ * between which don't overlap with memory on other
+ * nodes.
*/
- if (end < dma32_end && dma32_end - end -
- memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
-
- /*
- * Avoid allocating more nodes than requested, which can
- * happen as a result of rounding down each node's size
- * to FAKE_NODE_MIN_SIZE.
- */
- if (nodes_weight(physnode_mask) + ret >= nr_nodes)
- end = physnodes[i].end;
-
- if (setup_node_range(ret++, &physnodes[i].start,
- end - physnodes[i].start,
- physnodes[i].end) < 0)
- node_clear(i, physnode_mask);
+ if (bi->nid != bj->nid)
+ continue;
+ start = max(min(bi->start, bj->start), low);
+ end = min(max(bi->end, bj->end), high);
+ for (k = 0; k < mi->nr_blks; k++) {
+ struct numa_memblk *bk = &mi->blk[k];
+
+ if (bi->nid == bk->nid)
+ continue;
+ if (start < bk->end && end > bk->start)
+ break;
+ }
+ if (k < mi->nr_blks)
+ continue;
+ printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
+ bi->nid, bi->start, bi->end, bj->start, bj->end,
+ start, end);
+ bi->start = start;
+ bi->end = end;
+ numa_remove_memblk_from(j--, mi);
}
}
- return ret;
-}
-/*
- * Returns the end address of a node so that there is at least `size' amount of
- * non-reserved memory or `max_addr' is reached.
- */
-static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
-{
- u64 end = start + size;
-
- while (end - start - memblock_x86_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
- break;
- }
+ for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
+ mi->blk[i].start = mi->blk[i].end = 0;
+ mi->blk[i].nid = NUMA_NO_NODE;
}
- return end;
+
+ return 0;
}
/*
- * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ * Set nodes, which have memory in @mi, in *@nodemask.
*/
-static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
+static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+ const struct numa_meminfo *mi)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
- u64 min_size;
- int ret = 0;
int i;
- if (!size)
- return -1;
- /*
- * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
- * increased accordingly if the requested size is too small. This
- * creates a uniform distribution of node sizes across the entire
- * machine (but not necessarily over physical nodes).
- */
- min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
- MAX_NUMNODES;
- min_size = max(min_size, FAKE_NODE_MIN_SIZE);
- if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
- min_size = (min_size + FAKE_NODE_MIN_SIZE) &
- FAKE_NODE_MIN_HASH_MASK;
- if (size < min_size) {
- pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
- size >> 20, min_size >> 20);
- size = min_size;
- }
- size &= FAKE_NODE_MIN_HASH_MASK;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
- /*
- * Fill physical nodes with fake nodes of size until there is no memory
- * left on any of them.
- */
- while (nodes_weight(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
- u64 end;
-
- end = find_end_of_node(physnodes[i].start,
- physnodes[i].end, size);
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
+ for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
+ if (mi->blk[i].start != mi->blk[i].end &&
+ mi->blk[i].nid != NUMA_NO_NODE)
+ node_set(mi->blk[i].nid, *nodemask);
+}
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
+/**
+ * numa_reset_distance - Reset NUMA distance table
+ *
+ * The current table is freed. The next numa_set_distance() call will
+ * create a new one.
+ */
+void __init numa_reset_distance(void)
+{
+ size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
- /*
- * Setup the fake node that will be allocated as bootmem
- * later. If setup_node_range() returns non-zero, there
- * is no more memory available on this physical node.
- */
- if (setup_node_range(ret++, &physnodes[i].start,
- end - physnodes[i].start,
- physnodes[i].end) < 0)
- node_clear(i, physnode_mask);
- }
- }
- return ret;
+ /* numa_distance could be 1LU marking allocation failure, test cnt */
+ if (numa_distance_cnt)
+ memblock_x86_free_range(__pa(numa_distance),
+ __pa(numa_distance) + size);
+ numa_distance_cnt = 0;
+ numa_distance = NULL; /* enable table creation */
}
-/*
- * Sets up the system RAM area from start_pfn to last_pfn according to the
- * numa=fake command-line option.
- */
-static int __init numa_emulation(unsigned long start_pfn,
- unsigned long last_pfn, int acpi, int amd)
+static int __init numa_alloc_distance(void)
{
- u64 addr = start_pfn << PAGE_SHIFT;
- u64 max_addr = last_pfn << PAGE_SHIFT;
- int num_nodes;
- int i;
+ nodemask_t nodes_parsed;
+ size_t size;
+ int i, j, cnt = 0;
+ u64 phys;
- /*
- * If the numa=fake command-line contains a 'M' or 'G', it represents
- * the fixed node size. Otherwise, if it is just a single number N,
- * split the system RAM into N fake nodes.
- */
- if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
- u64 size;
+ /* size the new table and allocate it */
+ nodes_parsed = numa_nodes_parsed;
+ numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
- size = memparse(cmdline, &cmdline);
- num_nodes = split_nodes_size_interleave(addr, max_addr, size);
- } else {
- unsigned long n;
+ for_each_node_mask(i, nodes_parsed)
+ cnt = i;
+ cnt++;
+ size = cnt * cnt * sizeof(numa_distance[0]);
- n = simple_strtoul(cmdline, NULL, 0);
- num_nodes = split_nodes_interleave(addr, max_addr, n);
+ phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
+ size, PAGE_SIZE);
+ if (phys == MEMBLOCK_ERROR) {
+ pr_warning("NUMA: Warning: can't allocate distance table!\n");
+ /* don't retry until explicitly reset */
+ numa_distance = (void *)1LU;
+ return -ENOMEM;
}
+ memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
- if (num_nodes < 0)
- return num_nodes;
- memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
- if (memnode_shift < 0) {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
- "disabled.\n");
- return -1;
- }
+ numa_distance = __va(phys);
+ numa_distance_cnt = cnt;
+
+ /* fill with the default distances */
+ for (i = 0; i < cnt; i++)
+ for (j = 0; j < cnt; j++)
+ numa_distance[i * cnt + j] = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+ printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
- /*
- * We need to vacate all active ranges that may have been registered for
- * the e820 memory map.
- */
- remove_all_active_ranges();
- for_each_node_mask(i, node_possible_map) {
- memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- setup_physnodes(addr, max_addr, acpi, amd);
- fake_physnodes(acpi, amd, num_nodes);
- numa_init_array();
return 0;
}
-#endif /* CONFIG_NUMA_EMU */
-void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
- int acpi, int amd)
+/**
+ * numa_set_distance - Set NUMA distance from one NUMA to another
+ * @from: the 'from' node to set distance
+ * @to: the 'to' node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance. If distance table
+ * doesn't exist, one which is large enough to accomodate all the currently
+ * known nodes will be created.
+ *
+ * If such table cannot be allocated, a warning is printed and further
+ * calls are ignored until the distance table is reset with
+ * numa_reset_distance().
+ *
+ * If @from or @to is higher than the highest known node at the time of
+ * table creation or @distance doesn't make sense, the call is ignored.
+ * This is to allow simplification of specific NUMA config implementations.
+ */
+void __init numa_set_distance(int from, int to, int distance)
{
- int i;
-
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-
-#ifdef CONFIG_NUMA_EMU
- setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
- acpi, amd);
- if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
+ if (!numa_distance && numa_alloc_distance() < 0)
return;
- setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
- acpi, amd);
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
-#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT))
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
+ printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
+ }
-#ifdef CONFIG_AMD_NUMA
- if (!numa_off && amd && !amd_scan_nodes())
+ if ((u8)distance != distance ||
+ (from == to && distance != LOCAL_DISTANCE)) {
+ pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
- printk(KERN_INFO "%s\n",
- numa_off ? "NUMA turned off" : "No NUMA configuration found");
+ }
- printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
- start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = 63;
- memnodemap = memnode.embedded_map;
- memnodemap[0] = 0;
- node_set_online(0);
- node_set(0, node_possible_map);
- for (i = 0; i < nr_cpu_ids; i++)
- numa_set_node(i, 0);
- memblock_x86_register_active_regions(0, start_pfn, last_pfn);
- setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
+ numa_distance[from * numa_distance_cnt + to] = distance;
}
-unsigned long __init numa_free_all_bootmem(void)
+int __node_distance(int from, int to)
{
- unsigned long pages = 0;
- int i;
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+ return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+ return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
- for_each_online_node(i)
- pages += free_all_bootmem_node(NODE_DATA(i));
+/*
+ * Sanity check to catch more bad NUMA configurations (they are amazingly
+ * common). Make sure the nodes cover all memory.
+ */
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+{
+ unsigned long numaram, e820ram;
+ int i;
- pages += free_all_memory_core_early(MAX_NUMNODES);
+ numaram = 0;
+ for (i = 0; i < mi->nr_blks; i++) {
+ unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
+ unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
+ numaram += e - s;
+ numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
+ if ((long)numaram < 0)
+ numaram = 0;
+ }
- return pages;
+ e820ram = max_pfn - (memblock_x86_hole_size(0,
+ max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+ /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+ if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+ printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
+ (numaram << PAGE_SHIFT) >> 20,
+ (e820ram << PAGE_SHIFT) >> 20);
+ return false;
+ }
+ return true;
}
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
+static int __init numa_register_memblks(struct numa_meminfo *mi)
{
- int n, val;
- int min_val = INT_MAX;
- int best_node = -1;
+ int i, nid;
- for_each_online_node(n) {
- val = node_distance(node, n);
+ /* Account for nodes with cpus and no memory */
+ node_possible_map = numa_nodes_parsed;
+ numa_nodemask_from_meminfo(&node_possible_map, mi);
+ if (WARN_ON(nodes_empty(node_possible_map)))
+ return -EINVAL;
+
+ memnode_shift = compute_hash_shift(mi);
+ if (memnode_shift < 0) {
+ printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
+ return -EINVAL;
+ }
- if (val < min_val) {
- min_val = val;
- best_node = n;
+ for (i = 0; i < mi->nr_blks; i++)
+ memblock_x86_register_active_regions(mi->blk[i].nid,
+ mi->blk[i].start >> PAGE_SHIFT,
+ mi->blk[i].end >> PAGE_SHIFT);
+
+ /* for out of order entries */
+ sort_node_map();
+ if (!numa_meminfo_cover_memory(mi))
+ return -EINVAL;
+
+ /* Finally register nodes. */
+ for_each_node_mask(nid, node_possible_map) {
+ u64 start = (u64)max_pfn << PAGE_SHIFT;
+ u64 end = 0;
+
+ for (i = 0; i < mi->nr_blks; i++) {
+ if (nid != mi->blk[i].nid)
+ continue;
+ start = min(mi->blk[i].start, start);
+ end = max(mi->blk[i].end, end);
}
+
+ if (start < end)
+ setup_node_bootmem(nid, start, end);
}
- return best_node;
+ return 0;
}
-/*
- * Setup early cpu_to_node.
+/**
+ * dummy_numma_init - Fallback dummy NUMA init
*
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
*
- * Called before the per_cpu areas are setup.
+ * Must online at least one node and add memory blocks that cover all
+ * allowed memory. This function must not fail.
*/
-void __init init_cpu_to_node(void)
+static int __init dummy_numa_init(void)
{
- int cpu;
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-
- BUG_ON(cpu_to_apicid == NULL);
+ printk(KERN_INFO "%s\n",
+ numa_off ? "NUMA turned off" : "No NUMA configuration found");
+ printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
+ 0LU, max_pfn << PAGE_SHIFT);
- for_each_possible_cpu(cpu) {
- int node;
- u16 apicid = cpu_to_apicid[cpu];
+ node_set(0, numa_nodes_parsed);
+ numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
- if (apicid == BAD_APICID)
- continue;
- node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- node = find_near_online_node(node);
- numa_set_node(cpu, node);
- }
+ return 0;
}
-#endif
-
-void __cpuinit numa_set_node(int cpu, int node)
+static int __init numa_init(int (*init_func)(void))
{
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
- /* early setting, no percpu area yet */
- if (cpu_to_node_map) {
- cpu_to_node_map[cpu] = node;
- return;
- }
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
- if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
- printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
- dump_stack();
- return;
- }
-#endif
- per_cpu(x86_cpu_to_node_map, cpu) = node;
+ int i;
+ int ret;
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
-}
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ set_apicid_to_node(i, NUMA_NO_NODE);
-void __cpuinit numa_clear_node(int cpu)
-{
- numa_set_node(cpu, NUMA_NO_NODE);
-}
+ nodes_clear(numa_nodes_parsed);
+ nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
+ memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+ remove_all_active_ranges();
+ numa_reset_distance();
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+ ret = init_func();
+ if (ret < 0)
+ return ret;
+ ret = numa_cleanup_meminfo(&numa_meminfo);
+ if (ret < 0)
+ return ret;
-#ifndef CONFIG_NUMA_EMU
-void __cpuinit numa_add_cpu(int cpu)
-{
- cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
+ numa_emulation(&numa_meminfo, numa_distance_cnt);
-void __cpuinit numa_remove_cpu(int cpu)
-{
- cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-#else
-void __cpuinit numa_add_cpu(int cpu)
-{
- unsigned long addr;
- u16 apicid;
- int physnid;
- int nid = NUMA_NO_NODE;
+ ret = numa_register_memblks(&numa_meminfo);
+ if (ret < 0)
+ return ret;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
- if (nid == NUMA_NO_NODE)
- nid = early_cpu_to_node(cpu);
- BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
-
- /*
- * Use the starting address of the emulated node to find which physical
- * node it is allocated on.
- */
- addr = node_start_pfn(nid) << PAGE_SHIFT;
- for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
- if (addr >= physnodes[physnid].start &&
- addr < physnodes[physnid].end)
- break;
+ for (i = 0; i < nr_cpu_ids; i++) {
+ int nid = early_cpu_to_node(i);
- /*
- * Map the cpu to each emulated node that is allocated on the physical
- * node of the cpu's apic id.
- */
- for_each_online_node(nid) {
- addr = node_start_pfn(nid) << PAGE_SHIFT;
- if (addr >= physnodes[physnid].start &&
- addr < physnodes[physnid].end)
- cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+ if (nid == NUMA_NO_NODE)
+ continue;
+ if (!node_online(nid))
+ numa_clear_node(i);
}
+ numa_init_array();
+ return 0;
}
-void __cpuinit numa_remove_cpu(int cpu)
+void __init initmem_init(void)
{
- int i;
+ int ret;
- for_each_online_node(i)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
-}
-#endif /* !CONFIG_NUMA_EMU */
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
-{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
- char buf[64];
-
- mask = node_to_cpumask_map[node];
- if (!mask) {
- pr_err("node_to_cpumask_map[%i] NULL\n", node);
- dump_stack();
- return NULL;
+ if (!numa_off) {
+#ifdef CONFIG_ACPI_NUMA
+ ret = numa_init(x86_acpi_numa_init);
+ if (!ret)
+ return;
+#endif
+#ifdef CONFIG_AMD_NUMA
+ ret = numa_init(amd_numa_init);
+ if (!ret)
+ return;
+#endif
}
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu",
- cpu, node, buf);
- return mask;
+ numa_init(dummy_numa_init);
}
-/*
- * --------- debug versions of the numa functions ---------
- */
-#ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- struct cpumask *mask;
-
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
-}
-#else
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+unsigned long __init numa_free_all_bootmem(void)
{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
+ unsigned long pages = 0;
int i;
- for_each_online_node(i) {
- unsigned long addr;
-
- addr = node_start_pfn(i) << PAGE_SHIFT;
- if (addr < physnodes[node].start ||
- addr >= physnodes[node].end)
- continue;
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
- }
-}
-#endif /* CONFIG_NUMA_EMU */
+ for_each_online_node(i)
+ pages += free_all_bootmem_node(NODE_DATA(i));
-void __cpuinit numa_add_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 1);
-}
+ pages += free_all_memory_core_early(MAX_NUMNODES);
-void __cpuinit numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 0);
+ return pages;
}
-int __cpu_to_node(int cpu)
+int __cpuinit numa_cpu_node(int cpu)
{
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
- printk(KERN_WARNING
- "cpu_to_node(%d): usage too early!\n", cpu);
- dump_stack();
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(__cpu_to_node);
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- if (!cpu_possible(cpu)) {
- printk(KERN_WARNING
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
- dump_stack();
- return NUMA_NO_NODE;
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
}
-
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
new file mode 100644
index 00000000000..ad091e4cff1
--- /dev/null
+++ b/arch/x86/mm/numa_emulation.c
@@ -0,0 +1,494 @@
+/*
+ * NUMA emulation
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/topology.h>
+#include <linux/memblock.h>
+#include <asm/dma.h>
+
+#include "numa_internal.h"
+
+static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
+static char *emu_cmdline __initdata;
+
+void __init numa_emu_cmdline(char *str)
+{
+ emu_cmdline = str;
+}
+
+static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
+{
+ int i;
+
+ for (i = 0; i < mi->nr_blks; i++)
+ if (mi->blk[i].nid == nid)
+ return i;
+ return -ENOENT;
+}
+
+/*
+ * Sets up nid to range from @start to @end. The return value is -errno if
+ * something went wrong, 0 otherwise.
+ */
+static int __init emu_setup_memblk(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ int nid, int phys_blk, u64 size)
+{
+ struct numa_memblk *eb = &ei->blk[ei->nr_blks];
+ struct numa_memblk *pb = &pi->blk[phys_blk];
+
+ if (ei->nr_blks >= NR_NODE_MEMBLKS) {
+ pr_err("NUMA: Too many emulated memblks, failing emulation\n");
+ return -EINVAL;
+ }
+
+ ei->nr_blks++;
+ eb->start = pb->start;
+ eb->end = pb->start + size;
+ eb->nid = nid;
+
+ if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
+ emu_nid_to_phys[nid] = pb->nid;
+
+ pb->start += size;
+ if (pb->start >= pb->end) {
+ WARN_ON_ONCE(pb->start > pb->end);
+ numa_remove_memblk_from(phys_blk, pi);
+ }
+
+ printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
+ eb->start, eb->end, (eb->end - eb->start) >> 20);
+ return 0;
+}
+
+/*
+ * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
+ * to max_addr. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ u64 addr, u64 max_addr, int nr_nodes)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 size;
+ int big;
+ int nid = 0;
+ int i, ret;
+
+ if (nr_nodes <= 0)
+ return -1;
+ if (nr_nodes > MAX_NUMNODES) {
+ pr_info("numa=fake=%d too large, reducing to %d\n",
+ nr_nodes, MAX_NUMNODES);
+ nr_nodes = MAX_NUMNODES;
+ }
+
+ size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
+ /*
+ * Calculate the number of big nodes that can be allocated as a result
+ * of consolidating the remainder.
+ */
+ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
+ FAKE_NODE_MIN_SIZE;
+
+ size &= FAKE_NODE_MIN_HASH_MASK;
+ if (!size) {
+ pr_err("Not enough memory for each node. "
+ "NUMA emulation disabled.\n");
+ return -1;
+ }
+
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
+ /*
+ * Continue to fill physical nodes with fake nodes until there is no
+ * memory left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+ end = start + size;
+
+ if (nid < big)
+ end += FAKE_NODE_MIN_SIZE;
+
+ /*
+ * Continue to add memory to this fake node if its
+ * non-reserved memory is less than the per-node size.
+ */
+ while (end - start -
+ memblock_x86_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > limit) {
+ end = limit;
+ break;
+ }
+ }
+
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
+
+ ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
+ phys_blk,
+ min(end, limit) - start);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Returns the end address of a node so that there is at least `size' amount of
+ * non-reserved memory or `max_addr' is reached.
+ */
+static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
+{
+ u64 end = start + size;
+
+ while (end - start - memblock_x86_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > max_addr) {
+ end = max_addr;
+ break;
+ }
+ }
+ return end;
+}
+
+/*
+ * Sets up fake nodes of `size' interleaved over physical nodes ranging from
+ * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ u64 addr, u64 max_addr, u64 size)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 min_size;
+ int nid = 0;
+ int i, ret;
+
+ if (!size)
+ return -1;
+ /*
+ * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
+ * increased accordingly if the requested size is too small. This
+ * creates a uniform distribution of node sizes across the entire
+ * machine (but not necessarily over physical nodes).
+ */
+ min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
+ MAX_NUMNODES;
+ min_size = max(min_size, FAKE_NODE_MIN_SIZE);
+ if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
+ min_size = (min_size + FAKE_NODE_MIN_SIZE) &
+ FAKE_NODE_MIN_HASH_MASK;
+ if (size < min_size) {
+ pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
+ size >> 20, min_size >> 20);
+ size = min_size;
+ }
+ size &= FAKE_NODE_MIN_HASH_MASK;
+
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
+ /*
+ * Fill physical nodes with fake nodes of size until there is no memory
+ * left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+
+ end = find_end_of_node(start, limit, size);
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
+
+ ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
+ phys_blk,
+ min(end, limit) - start);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/**
+ * numa_emulation - Emulate NUMA nodes
+ * @numa_meminfo: NUMA configuration to massage
+ * @numa_dist_cnt: The size of the physical NUMA distance table
+ *
+ * Emulate NUMA nodes according to the numa=fake kernel parameter.
+ * @numa_meminfo contains the physical memory configuration and is modified
+ * to reflect the emulated configuration on success. @numa_dist_cnt is
+ * used to determine the size of the physical distance table.
+ *
+ * On success, the following modifications are made.
+ *
+ * - @numa_meminfo is updated to reflect the emulated nodes.
+ *
+ * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
+ * emulated nodes.
+ *
+ * - NUMA distance table is rebuilt to represent distances between emulated
+ * nodes. The distances are determined considering how emulated nodes
+ * are mapped to physical nodes and match the actual distances.
+ *
+ * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
+ * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
+ *
+ * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
+ * identity mapping and no other modification is made.
+ */
+void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
+{
+ static struct numa_meminfo ei __initdata;
+ static struct numa_meminfo pi __initdata;
+ const u64 max_addr = max_pfn << PAGE_SHIFT;
+ u8 *phys_dist = NULL;
+ size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
+ int max_emu_nid, dfl_phys_nid;
+ int i, j, ret;
+
+ if (!emu_cmdline)
+ goto no_emu;
+
+ memset(&ei, 0, sizeof(ei));
+ pi = *numa_meminfo;
+
+ for (i = 0; i < MAX_NUMNODES; i++)
+ emu_nid_to_phys[i] = NUMA_NO_NODE;
+
+ /*
+ * If the numa=fake command-line contains a 'M' or 'G', it represents
+ * the fixed node size. Otherwise, if it is just a single number N,
+ * split the system RAM into N fake nodes.
+ */
+ if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
+ u64 size;
+
+ size = memparse(emu_cmdline, &emu_cmdline);
+ ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
+ } else {
+ unsigned long n;
+
+ n = simple_strtoul(emu_cmdline, NULL, 0);
+ ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
+ }
+
+ if (ret < 0)
+ goto no_emu;
+
+ if (numa_cleanup_meminfo(&ei) < 0) {
+ pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+ goto no_emu;
+ }
+
+ /* copy the physical distance table */
+ if (numa_dist_cnt) {
+ u64 phys;
+
+ phys = memblock_find_in_range(0,
+ (u64)max_pfn_mapped << PAGE_SHIFT,
+ phys_size, PAGE_SIZE);
+ if (phys == MEMBLOCK_ERROR) {
+ pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+ goto no_emu;
+ }
+ memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
+ phys_dist = __va(phys);
+
+ for (i = 0; i < numa_dist_cnt; i++)
+ for (j = 0; j < numa_dist_cnt; j++)
+ phys_dist[i * numa_dist_cnt + j] =
+ node_distance(i, j);
+ }
+
+ /*
+ * Determine the max emulated nid and the default phys nid to use
+ * for unmapped nodes.
+ */
+ max_emu_nid = 0;
+ dfl_phys_nid = NUMA_NO_NODE;
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
+ if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
+ max_emu_nid = i;
+ if (dfl_phys_nid == NUMA_NO_NODE)
+ dfl_phys_nid = emu_nid_to_phys[i];
+ }
+ }
+ if (dfl_phys_nid == NUMA_NO_NODE) {
+ pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
+ goto no_emu;
+ }
+
+ /* commit */
+ *numa_meminfo = ei;
+
+ /*
+ * Transform __apicid_to_node table to use emulated nids by
+ * reverse-mapping phys_nid. The maps should always exist but fall
+ * back to zero just in case.
+ */
+ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
+ if (__apicid_to_node[i] == NUMA_NO_NODE)
+ continue;
+ for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
+ if (__apicid_to_node[i] == emu_nid_to_phys[j])
+ break;
+ __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
+ }
+
+ /* make sure all emulated nodes are mapped to a physical node */
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
+ if (emu_nid_to_phys[i] == NUMA_NO_NODE)
+ emu_nid_to_phys[i] = dfl_phys_nid;
+
+ /* transform distance table */
+ numa_reset_distance();
+ for (i = 0; i < max_emu_nid + 1; i++) {
+ for (j = 0; j < max_emu_nid + 1; j++) {
+ int physi = emu_nid_to_phys[i];
+ int physj = emu_nid_to_phys[j];
+ int dist;
+
+ if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
+ dist = physi == physj ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+ else
+ dist = phys_dist[physi * numa_dist_cnt + physj];
+
+ numa_set_distance(i, j, dist);
+ }
+ }
+
+ /* free the copied physical distance table */
+ if (phys_dist)
+ memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
+ return;
+
+no_emu:
+ /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
+ emu_nid_to_phys[i] = i;
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+void __cpuinit numa_add_cpu(int cpu)
+{
+ int physnid, nid;
+
+ nid = early_cpu_to_node(cpu);
+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
+
+ physnid = emu_nid_to_phys[nid];
+
+ /*
+ * Map the cpu to each emulated node that is allocated on the physical
+ * node of the cpu's apic id.
+ */
+ for_each_online_node(nid)
+ if (emu_nid_to_phys[nid] == physnid)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ int i;
+
+ for_each_online_node(i)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
+}
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+ int nid, physnid, i;
+
+ nid = early_cpu_to_node(cpu);
+ if (nid == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return;
+ }
+
+ physnid = emu_nid_to_phys[nid];
+
+ for_each_online_node(i) {
+ if (emu_nid_to_phys[nid] != physnid)
+ continue;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+ }
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
new file mode 100644
index 00000000000..ef2d97377d7
--- /dev/null
+++ b/arch/x86/mm/numa_internal.h
@@ -0,0 +1,31 @@
+#ifndef __X86_MM_NUMA_INTERNAL_H
+#define __X86_MM_NUMA_INTERNAL_H
+
+#include <linux/types.h>
+#include <asm/numa.h>
+
+struct numa_memblk {
+ u64 start;
+ u64 end;
+ int nid;
+};
+
+struct numa_meminfo {
+ int nr_blks;
+ struct numa_memblk blk[NR_NODE_MEMBLKS];
+};
+
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
+void __init numa_reset_distance(void);
+
+#ifdef CONFIG_NUMA_EMU
+void __init numa_emulation(struct numa_meminfo *numa_meminfo,
+ int numa_dist_cnt);
+#else
+static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
+ int numa_dist_cnt)
+{ }
+#endif
+
+#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3..90825f2eb0f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
void update_page_count(int level, unsigned long pages)
{
- unsigned long flags;
-
/* Protect against CPA */
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
direct_pages_count[level] += pages;
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
if (cpa->force_split)
return 1;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
out_unlock:
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return do_split;
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
- unsigned long flags, pfn, pfninc = 1;
+ unsigned long pfn, pfninc = 1;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -591,7 +589,7 @@ out_unlock:
*/
if (base)
__free_page(base);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96..0113d19c8aa 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
static void pgd_dtor(pgd_t *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
if (SHARED_KERNEL_PMD)
return;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- unsigned long flags;
pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
* respect to anything walking the pgd_list, so that they
* never see a partially populated pgd.
*/
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return pgd;
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae96e7b8051..48651c6f657 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
+static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
int acpi_numa __initdata;
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void)
printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
num_memory_chunks);
- for (i = 0; i < MAX_APICID; i++)
- apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
for (j = 0; j < num_memory_chunks; j++){
struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285d1da..8e9d3394f6d 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,88 +26,34 @@
int acpi_numa __initdata;
-static struct acpi_table_slit *acpi_slit;
-
-static nodemask_t nodes_parsed __initdata;
-static nodemask_t cpu_nodes_parsed __initdata;
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode nodes_add[MAX_NUMNODES];
-static int num_node_memblks __initdata;
-static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
-static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
-
static __init int setup_node(int pxm)
{
return acpi_map_pxm_to_node(pxm);
}
-static __init int conflicting_memblks(unsigned long start, unsigned long end)
-{
- int i;
- for (i = 0; i < num_node_memblks; i++) {
- struct bootnode *nd = &node_memblk_range[i];
- if (nd->start == nd->end)
- continue;
- if (nd->end > start && nd->start < end)
- return memblk_nodeid[i];
- if (nd->end == end && nd->start == start)
- return memblk_nodeid[i];
- }
- return -1;
-}
-
-static __init void cutoff_node(int i, unsigned long start, unsigned long end)
-{
- struct bootnode *nd = &nodes[i];
-
- if (nd->start < start) {
- nd->start = start;
- if (nd->end < nd->start)
- nd->start = nd->end;
- }
- if (nd->end > end) {
- nd->end = end;
- if (nd->start > nd->end)
- nd->start = nd->end;
- }
-}
-
static __init void bad_srat(void)
{
- int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_to_node[i] = NUMA_NO_NODE;
- for (i = 0; i < MAX_NUMNODES; i++) {
- nodes[i].start = nodes[i].end = 0;
- nodes_add[i].start = nodes_add[i].end = 0;
- }
- remove_all_active_ranges();
+ memset(nodes_add, 0, sizeof(nodes_add));
}
static __init inline int srat_disabled(void)
{
- return numa_off || acpi_numa < 0;
+ return acpi_numa < 0;
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
- unsigned length;
- unsigned long phys;
-
- length = slit->header.length;
- phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
- PAGE_SIZE);
-
- if (phys == MEMBLOCK_ERROR)
- panic(" Can not save slit!\n");
+ int i, j;
- acpi_slit = __va(phys);
- memcpy(acpi_slit, slit, length);
- memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
+ for (i = 0; i < slit->locality_count; i++)
+ for (j = 0; j < slit->locality_count; j++)
+ numa_set_distance(pxm_to_node(i), pxm_to_node(j),
+ slit->entry[slit->locality_count * i + j]);
}
/* Callback for Proximity Domain -> x2APIC mapping */
@@ -138,8 +84,8 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
- apicid_to_node[apic_id] = node;
- node_set(node, cpu_nodes_parsed);
+ set_apicid_to_node(apic_id, node);
+ node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
@@ -178,8 +124,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
return;
}
- apicid_to_node[apic_id] = node;
- node_set(node, cpu_nodes_parsed);
+ set_apicid_to_node(apic_id, node);
+ node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
@@ -241,7 +187,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
}
if (changed) {
- node_set(node, cpu_nodes_parsed);
+ node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
nd->start, nd->end);
}
@@ -251,10 +197,8 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
- struct bootnode *nd, oldnode;
unsigned long start, end;
int node, pxm;
- int i;
if (srat_disabled())
return;
@@ -276,300 +220,31 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
bad_srat();
return;
}
- i = conflicting_memblks(start, end);
- if (i == node) {
- printk(KERN_WARNING
- "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
- pxm, start, end, nodes[i].start, nodes[i].end);
- } else if (i >= 0) {
- printk(KERN_ERR
- "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
- pxm, start, end, node_to_pxm(i),
- nodes[i].start, nodes[i].end);
+
+ if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
return;
}
- nd = &nodes[node];
- oldnode = *nd;
- if (!node_test_and_set(node, nodes_parsed)) {
- nd->start = start;
- nd->end = end;
- } else {
- if (start < nd->start)
- nd->start = start;
- if (nd->end < end)
- nd->end = end;
- }
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end);
- if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+ if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
update_nodes_add(node, start, end);
- /* restore nodes[node] */
- *nd = oldnode;
- if ((nd->start | nd->end) == 0)
- node_clear(node, nodes_parsed);
- }
-
- node_memblk_range[num_node_memblks].start = start;
- node_memblk_range[num_node_memblks].end = end;
- memblk_nodeid[num_node_memblks] = node;
- num_node_memblks++;
-}
-
-/* Sanity check to catch more bad SRATs (they are amazingly common).
- Make sure the PXMs cover all memory. */
-static int __init nodes_cover_memory(const struct bootnode *nodes)
-{
- int i;
- unsigned long pxmram, e820ram;
-
- pxmram = 0;
- for_each_node_mask(i, nodes_parsed) {
- unsigned long s = nodes[i].start >> PAGE_SHIFT;
- unsigned long e = nodes[i].end >> PAGE_SHIFT;
- pxmram += e - s;
- pxmram -= __absent_pages_in_range(i, s, e);
- if ((long)pxmram < 0)
- pxmram = 0;
- }
-
- e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
- /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
- printk(KERN_ERR
- "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
- (pxmram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
- return 0;
- }
- return 1;
}
void __init acpi_numa_arch_fixup(void) {}
-#ifdef CONFIG_NUMA_EMU
-void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
- unsigned long end)
-{
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- cutoff_node(i, start, end);
- physnodes[i].start = nodes[i].start;
- physnodes[i].end = nodes[i].end;
- }
-}
-#endif /* CONFIG_NUMA_EMU */
-
-/* Use the information discovered above to actually set up the nodes. */
-int __init acpi_scan_nodes(unsigned long start, unsigned long end)
+int __init x86_acpi_numa_init(void)
{
- int i;
-
- if (acpi_numa <= 0)
- return -1;
-
- /* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++)
- cutoff_node(i, start, end);
-
- /*
- * Join together blocks on the same node, holes between
- * which don't overlap with memory on other nodes.
- */
- for (i = 0; i < num_node_memblks; ++i) {
- int j, k;
-
- for (j = i + 1; j < num_node_memblks; ++j) {
- unsigned long start, end;
-
- if (memblk_nodeid[i] != memblk_nodeid[j])
- continue;
- start = min(node_memblk_range[i].end,
- node_memblk_range[j].end);
- end = max(node_memblk_range[i].start,
- node_memblk_range[j].start);
- for (k = 0; k < num_node_memblks; ++k) {
- if (memblk_nodeid[i] == memblk_nodeid[k])
- continue;
- if (start < node_memblk_range[k].end &&
- end > node_memblk_range[k].start)
- break;
- }
- if (k < num_node_memblks)
- continue;
- start = min(node_memblk_range[i].start,
- node_memblk_range[j].start);
- end = max(node_memblk_range[i].end,
- node_memblk_range[j].end);
- printk(KERN_INFO "SRAT: Node %d "
- "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
- memblk_nodeid[i],
- node_memblk_range[i].start,
- node_memblk_range[i].end,
- node_memblk_range[j].start,
- node_memblk_range[j].end,
- start, end);
- node_memblk_range[i].start = start;
- node_memblk_range[i].end = end;
- k = --num_node_memblks - j;
- memmove(memblk_nodeid + j, memblk_nodeid + j+1,
- k * sizeof(*memblk_nodeid));
- memmove(node_memblk_range + j, node_memblk_range + j+1,
- k * sizeof(*node_memblk_range));
- --j;
- }
- }
-
- memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
- memblk_nodeid);
- if (memnode_shift < 0) {
- printk(KERN_ERR
- "SRAT: No NUMA node hash function found. Contact maintainer\n");
- bad_srat();
- return -1;
- }
-
- for (i = 0; i < num_node_memblks; i++)
- memblock_x86_register_active_regions(memblk_nodeid[i],
- node_memblk_range[i].start >> PAGE_SHIFT,
- node_memblk_range[i].end >> PAGE_SHIFT);
-
- /* for out of order entries in SRAT */
- sort_node_map();
- if (!nodes_cover_memory(nodes)) {
- bad_srat();
- return -1;
- }
+ int ret;
- /* Account for nodes with cpus and no memory */
- nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
-
- /* Finally register nodes */
- for_each_node_mask(i, node_possible_map)
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- /* Try again in case setup_node_bootmem missed one due
- to missing bootmem */
- for_each_node_mask(i, node_possible_map)
- if (!node_online(i))
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-
- for (i = 0; i < nr_cpu_ids; i++) {
- int node = early_cpu_to_node(i);
-
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- numa_clear_node(i);
- }
- numa_init_array();
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
- [0 ... MAX_NUMNODES-1] = PXM_INVAL
-};
-static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- /*
- * Find the real node that this emulated node appears on. For
- * the sake of simplicity, we only use a real node's starting
- * address to determine which emulated node it appears on.
- */
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- }
- return ret;
+ ret = acpi_numa_init();
+ if (ret < 0)
+ return ret;
+ return srat_disabled() ? -EINVAL : 0;
}
-/*
- * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
- * mappings that respect the real ACPI topology but reflect our emulated
- * environment. For each emulated node, we find which real node it appears on
- * and create PXM to NID mappings for those fake nodes which mirror that
- * locality. SLIT will now represent the correct distances between emulated
- * nodes as a result of the real topology.
- */
-void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
-{
- int i, j;
-
- for (i = 0; i < num_nodes; i++) {
- int nid, pxm;
-
- nid = find_node_by_addr(fake_nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
- pxm = node_to_pxm(nid);
- if (pxm == PXM_INVAL)
- continue;
- fake_node_to_pxm_map[i] = pxm;
- /*
- * For each apicid_to_node mapping that exists for this real
- * node, it must now point to the fake node ID.
- */
- for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid &&
- fake_apicid_to_node[j] == NUMA_NO_NODE)
- fake_apicid_to_node[j] = i;
- }
-
- /*
- * If there are apicid-to-node mappings for physical nodes that do not
- * have a corresponding emulated node, it should default to a guaranteed
- * value.
- */
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- if (apicid_to_node[i] != NUMA_NO_NODE &&
- fake_apicid_to_node[i] == NUMA_NO_NODE)
- fake_apicid_to_node[i] = 0;
-
- for (i = 0; i < num_nodes; i++)
- __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-
- nodes_clear(nodes_parsed);
- for (i = 0; i < num_nodes; i++)
- if (fake_nodes[i].start != fake_nodes[i].end)
- node_set(i, nodes_parsed);
-}
-
-static int null_slit_node_compare(int a, int b)
-{
- return node_to_pxm(a) == node_to_pxm(b);
-}
-#else
-static int null_slit_node_compare(int a, int b)
-{
- return a == b;
-}
-#endif /* CONFIG_NUMA_EMU */
-
-int __node_distance(int a, int b)
-{
- int index;
-
- if (!acpi_slit)
- return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
- REMOTE_DISTANCE;
- index = acpi_slit->locality_count * node_to_pxm(a);
- return acpi_slit->entry[index + node_to_pxm(b)];
-}
-
-EXPORT_SYMBOL(__node_distance);
-
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
int memory_add_physaddr_to_nid(u64 start)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8..d6c0418c3e4 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
sender = this_cpu_read(tlb_vector_offset);
f = &flush_state[sender];
- /*
- * Could avoid this lock when
- * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- * probably not worth checking this for a cache-hot lock.
- */
- raw_spin_lock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
f->flush_va = va;
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = NULL;
f->flush_va = 0;
- raw_spin_unlock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_unlock(&f->tlbstate_lock);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -211,11 +208,10 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
if (is_uv_system()) {
unsigned int cpu;
- cpu = get_cpu();
+ cpu = smp_processor_id();
cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
if (cpumask)
flush_tlb_others_ipi(cpumask, mm, va);
- put_cpu();
return;
}
flush_tlb_others_ipi(cpumask, mm, va);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e27dffbbb1a..026e4931d16 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void)
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
-static void enable_pci_io_ecs(void *unused)
+static void __cpuinit enable_pci_io_ecs(void *unused)
{
u64 reg;
rdmsrl(MSR_AMD64_NB_CFG, reg);
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e80..67858be4b52 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <asm/ce4100.h>
#include <asm/pci_x86.h>
struct sim_reg {
@@ -254,7 +255,7 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
static int ce4100_conf_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
- int i, retval = 1;
+ int i;
if (bus == 1) {
for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
.write = ce4100_conf_write,
};
-static int __init ce4100_pci_init(void)
+int __init ce4100_pci_init(void)
{
init_sim_regs();
raw_pci_ops = &ce4100_pci_conf;
- return 0;
+ /* Indicate caller that it should invoke pci_legacy_init() */
+ return 1;
}
-subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a07d09..8c4085a95ef 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
#include <asm/xen/pci.h>
#ifdef CONFIG_ACPI
-static int xen_hvm_register_pirq(u32 gsi, int triggering)
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+ int trigger, int polarity)
{
int rc, irq;
struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return -1;
}
- if (triggering == ACPI_EDGE_SENSITIVE) {
+ if (trigger == ACPI_EDGE_SENSITIVE) {
shareable = 0;
name = "ioapic-edge";
} else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return irq;
}
-
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
- int trigger, int polarity)
-{
- return xen_hvm_register_pirq(gsi, trigger);
-}
#endif
#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, pirq, ret = 0;
+ int irq, pirq;
struct msi_desc *msidesc;
struct msi_msg msg;
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
__read_msi_msg(msidesc, &msg);
pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
- if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
- if (irq < 0)
+ if (msg.data != XEN_PIRQ_MSI_DATA ||
+ xen_irq_from_pirq(pirq) < 0) {
+ pirq = xen_allocate_pirq_msi(dev, msidesc);
+ if (pirq < 0)
goto error;
- ret = set_irq_msi(irq, msidesc);
- if (ret < 0)
- goto error_while;
- printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
- " pirq=%d\n", irq, pirq);
- return 0;
+ xen_msi_compose_msg(dev, pirq, &msg);
+ __write_msi_msg(msidesc, &msg);
+ dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
+ } else {
+ dev_dbg(&dev->dev,
+ "xen: msi already bound to pirq=%d\n", pirq);
}
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
- if (irq < 0 || pirq < 0)
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (irq < 0)
goto error;
- printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
- xen_msi_compose_msg(dev, pirq, &msg);
- ret = set_irq_msi(irq, msidesc);
- if (ret < 0)
- goto error_while;
- write_msi_msg(irq, &msg);
+ dev_dbg(&dev->dev,
+ "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
}
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
-
- return ret;
+ dev_err(&dev->dev,
+ "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ return -ENODEV;
}
/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return -ENOMEM;
if (type == PCI_CAP_ID_MSIX)
- ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+ ret = xen_pci_frontend_enable_msix(dev, v, nvec);
else
- ret = xen_pci_frontend_enable_msi(dev, &v);
+ ret = xen_pci_frontend_enable_msi(dev, v);
if (ret)
goto error;
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_allocate_pirq(v[i], 0, /* not sharable */
- (type == PCI_CAP_ID_MSIX) ?
- "pcifront-msi-x" : "pcifront-msi");
- if (irq < 0) {
- ret = -1;
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "pcifront-msi-x" :
+ "pcifront-msi");
+ if (irq < 0)
goto free;
- }
-
- ret = set_irq_msi(irq, msidesc);
- if (ret)
- goto error_while;
i++;
}
kfree(v);
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
+ dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
free:
kfree(v);
return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
+
+ /* Free the IRQ's and the msidesc using the generic code. */
+ default_teardown_msi_irqs(dev);
}
static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
xen_destroy_irq(irq);
}
+#ifdef CONFIG_XEN_DOM0
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, ret;
+ int ret = 0;
struct msi_desc *msidesc;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_create_msi_irq(dev, msidesc, type);
- if (irq < 0)
- return -1;
+ struct physdev_map_pirq map_irq;
- ret = set_irq_msi(irq, msidesc);
- if (ret)
- goto error;
- }
- return 0;
+ memset(&map_irq, 0, sizeof(map_irq));
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ map_irq.devfn = dev->devfn;
-error:
- xen_destroy_irq(irq);
+ if (type == PCI_CAP_ID_MSIX) {
+ int pos;
+ u32 table_offset, bir;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+
+ pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
+ &table_offset);
+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+
+ map_irq.table_base = pci_resource_start(dev, bir);
+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+ }
+
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (ret) {
+ dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+ goto out;
+ }
+
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
+ map_irq.pirq, map_irq.index,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+out:
return ret;
}
#endif
+#endif
static int xen_pcifront_enable_irq(struct pci_dev *dev)
{
int rc;
int share = 1;
+ u8 gsi;
- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
-
- if (dev->irq < 0)
- return -EINVAL;
+ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+ if (rc < 0) {
+ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+ rc);
+ return rc;
+ }
- if (dev->irq < NR_IRQS_LEGACY)
+ if (gsi < NR_IRQS_LEGACY)
share = 0;
- rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+ rc = xen_allocate_pirq(gsi, share, "pcifront");
if (rc < 0) {
- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
- dev->irq, rc);
+ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
+ gsi, rc);
return rc;
}
+
+ dev->irq = rc;
+ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
return 0;
}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a717..28071bb31db 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,21 +15,20 @@
#include <linux/serial_reg.h>
#include <linux/serial_8250.h>
+#include <asm/ce4100.h>
+#include <asm/prom.h>
#include <asm/setup.h>
+#include <asm/i8259.h>
#include <asm/io.h>
+#include <asm/io_apic.h>
static int ce4100_i8042_detect(void)
{
return 0;
}
-static void __init sdv_find_smp_config(void)
-{
-}
-
#ifdef CONFIG_SERIAL_8250
-
static unsigned int mem_serial_in(struct uart_port *p, int offset)
{
offset = offset << p->regshift;
@@ -118,6 +117,15 @@ static void __init sdv_arch_setup(void)
sdv_serial_fixup();
}
+#ifdef CONFIG_X86_IO_APIC
+static void __cpuinit sdv_pci_init(void)
+{
+ x86_of_pci_init();
+ /* We can't set this earlier, because we need to calibrate the timer */
+ legacy_pic = &null_legacy_pic;
+}
+#endif
+
/*
* CE4100 specific x86_init function overrides and early setup
* calls.
@@ -128,5 +136,11 @@ void __init x86_ce4100_early_setup(void)
x86_platform.i8042_detect = ce4100_i8042_detect;
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.pci.init = ce4100_pci_init;
+
+#ifdef CONFIG_X86_IO_APIC
+ x86_init.pci.init_irq = sdv_pci_init;
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
+#endif
}
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
new file mode 100644
index 00000000000..dc701ea5854
--- /dev/null
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -0,0 +1,428 @@
+/*
+ * CE4100 on Falcon Falls
+ *
+ * (c) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+/dts-v1/;
+/ {
+ model = "intel,falconfalls";
+ compatible = "intel,falconfalls";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+ };
+
+ soc@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,ce4100-cp";
+ ranges;
+
+ ioapic1: interrupt-controller@fec00000 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0xfec00000 0x1000>;
+ };
+
+ timer@fed00000 {
+ compatible = "intel,ce4100-hpet";
+ reg = <0xfed00000 0x200>;
+ };
+
+ lapic0: interrupt-controller@fee00000 {
+ compatible = "intel,ce4100-lapic";
+ reg = <0xfee00000 0x1000>;
+ };
+
+ pci@3fc {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <0 0>;
+ ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000
+ 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000
+ 0x0000000 0 0x0 0x0 0 0x100>;
+
+ /* Secondary IO-APIC */
+ ioapic2: interrupt-controller@0,1 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0x100 0x0 0x0 0x0 0x0>;
+ assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>;
+ };
+
+ pci@1,0 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <1 1>;
+ ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
+
+ interrupt-parent = <&ioapic2>;
+
+ display@2,0 {
+ compatible = "pci8086,2e5b.2",
+ "pci8086,2e5b",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x11000 0x0 0x0 0x0 0x0>;
+ interrupts = <0 1>;
+ };
+
+ multimedia@3,0 {
+ compatible = "pci8086,2e5c.2",
+ "pci8086,2e5c",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x11800 0x0 0x0 0x0 0x0>;
+ interrupts = <2 1>;
+ };
+
+ multimedia@4,0 {
+ compatible = "pci8086,2e5d.2",
+ "pci8086,2e5d",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12000 0x0 0x0 0x0 0x0>;
+ interrupts = <4 1>;
+ };
+
+ multimedia@4,1 {
+ compatible = "pci8086,2e5e.2",
+ "pci8086,2e5e",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12100 0x0 0x0 0x0 0x0>;
+ interrupts = <5 1>;
+ };
+
+ sound@6,0 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13000 0x0 0x0 0x0 0x0>;
+ interrupts = <6 1>;
+ };
+
+ sound@6,1 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13100 0x0 0x0 0x0 0x0>;
+ interrupts = <7 1>;
+ };
+
+ sound@6,2 {
+ compatible = "pci8086,2e60.2",
+ "pci8086,2e60",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13200 0x0 0x0 0x0 0x0>;
+ interrupts = <8 1>;
+ };
+
+ display@8,0 {
+ compatible = "pci8086,2e61.2",
+ "pci8086,2e61",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14000 0x0 0x0 0x0 0x0>;
+ interrupts = <9 1>;
+ };
+
+ display@8,1 {
+ compatible = "pci8086,2e62.2",
+ "pci8086,2e62",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14100 0x0 0x0 0x0 0x0>;
+ interrupts = <10 1>;
+ };
+
+ multimedia@8,2 {
+ compatible = "pci8086,2e63.2",
+ "pci8086,2e63",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x14200 0x0 0x0 0x0 0x0>;
+ interrupts = <11 1>;
+ };
+
+ entertainment-encryption@9,0 {
+ compatible = "pci8086,2e64.2",
+ "pci8086,2e64",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x14800 0x0 0x0 0x0 0x0>;
+ interrupts = <12 1>;
+ };
+
+ localbus@a,0 {
+ compatible = "pci8086,2e65.2",
+ "pci8086,2e65",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15000 0x0 0x0 0x0 0x0>;
+ };
+
+ serial@b,0 {
+ compatible = "pci8086,2e66.2",
+ "pci8086,2e66",
+ "pciclass070003",
+ "pciclass0700";
+
+ reg = <0x15800 0x0 0x0 0x0 0x0>;
+ interrupts = <14 1>;
+ };
+
+ gpio@b,1 {
+ compatible = "pci8086,2e67.2",
+ "pci8086,2e67",
+ "pciclassff0000",
+ "pciclassff00";
+
+ #gpio-cells = <2>;
+ reg = <0x15900 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ gpio-controller;
+ };
+
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <0 0 0x100>;
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
+
+ smard-card@b,3 {
+ compatible = "pci8086,2e69.2",
+ "pci8086,2e69",
+ "pciclass070500",
+ "pciclass0705";
+
+ reg = <0x15b00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ };
+
+ spi-controller@b,4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible =
+ "pci8086,2e6a.2",
+ "pci8086,2e6a",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15c00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+
+ dac@0 {
+ compatible = "ti,pcm1755";
+ reg = <0>;
+ spi-max-frequency = <115200>;
+ };
+
+ dac@1 {
+ compatible = "ti,pcm1609a";
+ reg = <1>;
+ spi-max-frequency = <115200>;
+ };
+
+ eeprom@2 {
+ compatible = "atmel,at93c46";
+ reg = <2>;
+ spi-max-frequency = <115200>;
+ };
+ };
+
+ multimedia@b,7 {
+ compatible = "pci8086,2e6d.2",
+ "pci8086,2e6d",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15f00 0x0 0x0 0x0 0x0>;
+ };
+
+ ethernet@c,0 {
+ compatible = "pci8086,2e6e.2",
+ "pci8086,2e6e",
+ "pciclass020000",
+ "pciclass0200";
+
+ reg = <0x16000 0x0 0x0 0x0 0x0>;
+ interrupts = <21 1>;
+ };
+
+ clock@c,1 {
+ compatible = "pci8086,2e6f.2",
+ "pci8086,2e6f",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x16100 0x0 0x0 0x0 0x0>;
+ interrupts = <3 1>;
+ };
+
+ usb@d,0 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16800 0x0 0x0 0x0 0x0>;
+ interrupts = <22 3>;
+ };
+
+ usb@d,1 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16900 0x0 0x0 0x0 0x0>;
+ interrupts = <22 3>;
+ };
+
+ sata@e,0 {
+ compatible = "pci8086,2e71.0",
+ "pci8086,2e71",
+ "pciclass010601",
+ "pciclass0106";
+
+ reg = <0x17000 0x0 0x0 0x0 0x0>;
+ interrupts = <23 3>;
+ };
+
+ flash@f,0 {
+ compatible = "pci8086,701.1",
+ "pci8086,701",
+ "pciclass050100",
+ "pciclass0501";
+
+ reg = <0x17800 0x0 0x0 0x0 0x0>;
+ interrupts = <13 1>;
+ };
+
+ entertainment-encryption@10,0 {
+ compatible = "pci8086,702.1",
+ "pci8086,702",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x18000 0x0 0x0 0x0 0x0>;
+ };
+
+ co-processor@11,0 {
+ compatible = "pci8086,703.1",
+ "pci8086,703",
+ "pciclass0b4000",
+ "pciclass0b40";
+
+ reg = <0x18800 0x0 0x0 0x0 0x0>;
+ interrupts = <1 1>;
+ };
+
+ multimedia@12,0 {
+ compatible = "pci8086,704.0",
+ "pci8086,704",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x19000 0x0 0x0 0x0 0x0>;
+ };
+ };
+
+ isa@1f,0 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "isa";
+ ranges = <1 0 0 0 0 0x100>;
+
+ rtc@70 {
+ compatible = "intel,ce4100-rtc", "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
+ };
+ };
+ };
+};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ea6529e93c6..5c0207bf959 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
#include <asm/io.h>
#include <asm/i8259.h>
#include <asm/intel_scu_ipc.h>
@@ -268,6 +269,7 @@ void __init x86_mrst_early_setup(void)
x86_platform.calibrate_tsc = mrst_calibrate_tsc;
x86_platform.i8042_detect = mrst_i8042_detect;
+ x86_init.timers.wallclock_init = mrst_rtc_init;
x86_init.pci.init = pci_mrst_init;
x86_init.pci.fixup_irqs = x86_init_noop;
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 32cd7edd71a..04cf645feb9 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -100,22 +100,14 @@ int vrtc_set_mmss(unsigned long nowtime)
void __init mrst_rtc_init(void)
{
- unsigned long rtc_paddr;
- void __iomem *virt_base;
+ unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr;
sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
- if (!sfi_mrtc_num)
+ if (!sfi_mrtc_num || !vrtc_paddr)
return;
- rtc_paddr = sfi_mrtc_array[0].phys_addr;
-
- /* vRTC's register address may not be page aligned */
- set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
-
- virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
- virt_base += rtc_paddr & ~PAGE_MASK;
- vrtc_virt_base = virt_base;
-
+ vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
+ vrtc_paddr);
x86_platform.get_wallclock = vrtc_get_time;
x86_platform.set_wallclock = vrtc_set_mmss;
}
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index e797428b163..c2a8cab65e5 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_OLPC) += olpc.o
obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE_DT) += olpc_dt.o
+obj-$(CONFIG_OLPC) += olpc_ofw.o
+obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96..a7b38d35c29 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1;
/*
- * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
+ * base_dest_nodeid is the nasid of the first uvhub
* in the partition. The bit map will indicate uvhub numbers,
* which are 0-N in a partition. Pnodes are unique system-wide.
*/
- bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
+ bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
bd2->header.dest_subnodeid = 0x10; /* the LB */
bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 7b24460917d..374a05d8ad2 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode, err;
@@ -148,7 +148,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
else
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
mmr_value = 0;
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 63203767174..fe4cf829487 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -569,11 +569,13 @@ out_unlock:
static struct irqaction master_action = {
.handler = piix4_master_intr,
.name = "PIIX4-8259",
+ .flags = IRQF_NO_THREAD,
};
static struct irqaction cascade_action = {
.handler = no_action,
.name = "cascade",
+ .flags = IRQF_NO_THREAD,
};
static inline void set_piix4_virtual_irq_type(void)
@@ -606,7 +608,7 @@ static void __init visws_pre_intr_init(void)
chip = &cobalt_irq_type;
if (chip)
- set_irq_chip(i, chip);
+ irq_set_chip(i, chip);
}
setup_irq(CO_IRQ_8259, &master_action);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc..e4343fe488e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+
+config XEN_DEBUG
+ bool "Enable Xen debug checks"
+ depends on XEN
+ default n
+ help
+ Enable various WARN_ON checks in the Xen MMU code.
+ Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45f..49dbd78ec3c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
xen_setup_features();
- pv_info = xen_info;
- pv_info.kernel_rpl = 0;
+ pv_info.name = "Xen HVM";
xen_domain_type = XEN_HVM_DOMAIN;
return 0;
}
-void xen_hvm_init_shared_info(void)
+void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ if (xen_have_vector_callback)
+ xen_init_lock_cpu(cpu);
break;
default:
break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
if (xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
+ xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad57..3f6f3347aa1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/seq_file.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
- unsigned long mfn = pfn_to_mfn(pfn);
+ unsigned long mfn;
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ mfn = get_phys_to_machine(pfn);
+ else
+ mfn = pfn;
/*
* If there's no mfn for the pfn, then just create an
* empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
+ } else {
+ /*
+ * Paramount to do this test _after_ the
+ * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
+ * IDENTITY_FRAME_BIT resolves to true.
+ */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ if (mfn & IDENTITY_FRAME_BIT) {
+ mfn &= ~IDENTITY_FRAME_BIT;
+ flags |= _PAGE_IOMAP;
+ }
}
-
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+#ifdef CONFIG_XEN_DEBUG
+pte_t xen_make_pte_debug(pteval_t pte)
+{
+ phys_addr_t addr = (pte & PTE_PFN_MASK);
+ phys_addr_t other_addr;
+ bool io_page = false;
+ pte_t _pte;
+
+ if (pte & _PAGE_IOMAP)
+ io_page = true;
+
+ _pte = xen_make_pte(pte);
+
+ if (!addr)
+ return _pte;
+
+ if (io_page &&
+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
+ other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
+ WARN(addr != other_addr,
+ "0x%lx is using VM_IO, but it is 0x%lx!\n",
+ (unsigned long)addr, (unsigned long)other_addr);
+ } else {
+ pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
+ other_addr = (_pte.pte & PTE_PFN_MASK);
+ WARN((addr == other_addr) && (!io_page) && (!iomap_set),
+ "0x%lx is missing VM_IO (and wasn't fixed)!\n",
+ (unsigned long)addr);
+ }
+
+ return _pte;
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
+#endif
+
pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
*/
void xen_mm_pin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
*/
void xen_mm_unpin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1443,7 +1491,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
* early_ioremap fixmap slot, make sure it is RO.
*/
if (!is_early_ioremap_ptep(ptep) &&
- pfn >= e820_table_start && pfn < e820_table_end)
+ pfn >= pgt_buf_start && pfn < pgt_buf_end)
pte = pte_wrprotect(pte);
return pte;
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
static __init void xen_post_allocator_init(void)
{
+#ifdef CONFIG_XEN_DEBUG
+ pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
+#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
in_frames[i] = virt_to_mfn(vaddr);
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
- set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
if (out_frames)
out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
return 0;
}
fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff..215a3ce6106 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
* P2M_PER_PAGE depends on the architecture, as a mfn is always
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
* 512 and 1024 entries respectively.
+ *
+ * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
+ *
+ * However not all entries are filled with MFNs. Specifically for all other
+ * leaf entries, or for the top root, or middle one, for which there is a void
+ * entry, we assume it is "missing". So (for example)
+ * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+ *
+ * We also have the possibility of setting 1-1 mappings on certain regions, so
+ * that:
+ * pfn_to_mfn(0xc0000)=0xc0000
+ *
+ * The benefit of this is, that we can assume for non-RAM regions (think
+ * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
+ * get the PFN value to match the MFN.
+ *
+ * For this to work efficiently we have one new page p2m_identity and
+ * allocate (via reserved_brk) any other pages we need to cover the sides
+ * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
+ * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
+ * no other fancy value).
+ *
+ * On lookup we spot that the entry points to p2m_identity and return the
+ * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
+ * If the entry points to an allocated page, we just proceed as before and
+ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+ * appropriate functions (pfn_to_mfn).
+ *
+ * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
+ * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
+ * non-identity pfn. To protect ourselves against we elect to set (and get) the
+ * IDENTITY_FRAME_BIT on all identity mapped PFNs.
+ *
+ * This simplistic diagram is used to explain the more subtle piece of code.
+ * There is also a digram of the P2M at the end that can help.
+ * Imagine your E820 looking as so:
+ *
+ * 1GB 2GB
+ * /-------------------+---------\/----\ /----------\ /---+-----\
+ * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
+ * \-------------------+---------/\----/ \----------/ \---+-----/
+ * ^- 1029MB ^- 2001MB
+ *
+ * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
+ * 2048MB = 524288 (0x80000)]
+ *
+ * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
+ * is actually not present (would have to kick the balloon driver to put it in).
+ *
+ * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
+ * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
+ * of the PFN and the end PFN (263424 and 512256 respectively). The first step
+ * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
+ * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
+ * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
+ * to end pfn. We reserve_brk top leaf pages if they are missing (means they
+ * point to p2m_mid_missing).
+ *
+ * With the E820 example above, 263424 is not 1GB aligned so we allocate a
+ * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
+ * Each entry in the allocate page is "missing" (points to p2m_missing).
+ *
+ * Next stage is to determine if we need to do a more granular boundary check
+ * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
+ * We check if the start pfn and end pfn violate that boundary check, and if
+ * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
+ * granularity of setting which PFNs are missing and which ones are identity.
+ * In our example 263424 and 512256 both fail the check so we reserve_brk two
+ * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
+ * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
+ *
+ * At this point we would at minimum reserve_brk one page, but could be up to
+ * three. Each call to set_phys_range_identity has at maximum a three page
+ * cost. If we were to query the P2M at this stage, all those entries from
+ * start PFN through end PFN (so 1029MB -> 2001MB) would return
+ * INVALID_P2M_ENTRY ("missing").
+ *
+ * The next step is to walk from the start pfn to the end pfn setting
+ * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
+ * If we find that the middle leaf is pointing to p2m_missing we can swap it
+ * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
+ * point we do not need to worry about boundary aligment (so no need to
+ * reserve_brk a middle page, figure out which PFNs are "missing" and which
+ * ones are identity), as that has been done earlier. If we find that the
+ * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
+ * that page (which covers 512 PFNs) and set the appropriate PFN with
+ * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
+ * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
+ * IDENTITY_FRAME_BIT set.
+ *
+ * All other regions that are void (or not filled) either point to p2m_missing
+ * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
+ * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
+ * contain the INVALID_P2M_ENTRY value and are considered "missing."
+ *
+ * This is what the p2m ends up looking (for the E820 above) with this
+ * fabulous drawing:
+ *
+ * p2m /--------------\
+ * /-----\ | &mfn_list[0],| /-----------------\
+ * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
+ * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
+ * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
+ * |-----| \ | [p2m_identity]+\\ | .... |
+ * | 2 |--\ \-------------------->| ... | \\ \----------------/
+ * |-----| \ \---------------/ \\
+ * | 3 |\ \ \\ p2m_identity
+ * |-----| \ \-------------------->/---------------\ /-----------------\
+ * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
+ * \-----/ / | [p2m_identity]+-->| ..., ~0 |
+ * / /---------------\ | .... | \-----------------/
+ * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
+ * / | IDENTITY[@256]|<----/ \---------------/
+ * / | ~0, ~0, .... |
+ * | \---------------/
+ * |
+ * p2m_missing p2m_missing
+ * /------------------\ /------------\
+ * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
+ * | [p2m_mid_missing]+---->| ..., ~0 |
+ * \------------------/ \------------/
+ *
+ * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
#include <linux/init.h>
@@ -30,6 +153,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#include <asm/cache.h>
#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
+
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+/* We might hit two boundary violations at the start and end, at max each
+ * boundary violation will require three middle nodes. */
+RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
* - After resume we're called from within stop_machine, but the mfn
* tree should alreay be completely allocated.
*/
-void xen_build_mfn_list_list(void)
+void __ref xen_build_mfn_list_list(void)
{
unsigned long pfn;
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
+ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_init(p2m_identity);
+
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /*
+ * The INVALID_P2M_ENTRY is filled in both p2m_*identity
+ * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
+ * would be wrong.
+ */
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return IDENTITY_FRAME(pfn);
+
return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
p2m_top_mfn_p[topidx] = mid_mfn;
}
- if (p2m_top[topidx][mididx] == p2m_missing) {
+ if (p2m_top[topidx][mididx] == p2m_identity ||
+ p2m_top[topidx][mididx] == p2m_missing) {
/* p2m leaf page is missing */
unsigned long *p2m;
+ unsigned long *p2m_orig = p2m_top[topidx][mididx];
p2m = alloc_p2m_page();
if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
p2m_init(p2m);
- if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
+ if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
free_p2m_page(p2m);
else
mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
+bool __early_alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx, mididx, idx;
+
+ topidx = p2m_top_index(pfn);
+ mididx = p2m_mid_index(pfn);
+ idx = p2m_index(pfn);
+
+ /* Pfff.. No boundary cross-over, lets get out. */
+ if (!idx)
+ return false;
+
+ WARN(p2m_top[topidx][mididx] == p2m_identity,
+ "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
+ topidx, mididx);
+
+ /*
+ * Could be done by xen_build_dynamic_phys_to_machine..
+ */
+ if (p2m_top[topidx][mididx] != p2m_missing)
+ return false;
+
+ /* Boundary cross-over for the edges: */
+ if (idx) {
+ unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_init(p2m);
+
+ p2m_top[topidx][mididx] = p2m;
+
+ }
+ return idx != 0;
+}
+unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e)
+{
+ unsigned long pfn;
+
+ if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
+ return 0;
+
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
+ return pfn_e - pfn_s;
+
+ if (pfn_s > pfn_e)
+ return 0;
+
+ for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
+ pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
+ pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
+ {
+ unsigned topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_mid_init(mid);
+
+ p2m_top[topidx] = mid;
+ }
+ }
+
+ __early_alloc_p2m(pfn_s);
+ __early_alloc_p2m(pfn_e);
+
+ for (pfn = pfn_s; pfn < pfn_e; pfn++)
+ if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
+ break;
+
+ if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
+ "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
+ (pfn_e - pfn_s) - (pfn - pfn_s)))
+ printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
+
+ return pfn - pfn_s;
+}
+
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return true;
+ }
if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /* For sparse holes were the p2m leaf has real PFN along with
+ * PCI holes, stick in the PFN as the MFN value.
+ */
+ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return true;
+
+ /* Swap over from MISSING to IDENTITY if needed. */
+ if (p2m_top[topidx][mididx] == p2m_missing) {
+ WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
+ p2m_identity) != p2m_missing);
+ return true;
+ }
+ }
+
if (p2m_top[topidx][mididx] == p2m_missing)
return mfn == INVALID_P2M_ENTRY;
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return true;
- }
-
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!alloc_p2m(pfn))
return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
{
unsigned long flags;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
unsigned long flags;
unsigned long mfn;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
return ret;
}
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+int p2m_dump_show(struct seq_file *m, void *v)
+{
+ static const char * const level_name[] = { "top", "middle",
+ "entry", "abnormal" };
+ static const char * const type_name[] = { "identity", "missing",
+ "pfn", "abnormal"};
+#define TYPE_IDENTITY 0
+#define TYPE_MISSING 1
+#define TYPE_PFN 2
+#define TYPE_UNKNOWN 3
+ unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
+ unsigned int uninitialized_var(prev_level);
+ unsigned int uninitialized_var(prev_type);
+
+ if (!p2m_top)
+ return 0;
+
+ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
+ unsigned idx = p2m_index(pfn);
+ unsigned lvl, type;
+
+ lvl = 4;
+ type = TYPE_UNKNOWN;
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ lvl = 0; type = TYPE_MISSING;
+ } else if (p2m_top[topidx] == NULL) {
+ lvl = 0; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == NULL) {
+ lvl = 1; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == p2m_identity) {
+ lvl = 1; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx] == p2m_missing) {
+ lvl = 1; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == 0) {
+ lvl = 2; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
+ lvl = 2; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
+ lvl = 2; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == pfn) {
+ lvl = 2; type = TYPE_PFN;
+ } else if (p2m_top[topidx][mididx][idx] != pfn) {
+ lvl = 2; type = TYPE_PFN;
+ }
+ if (pfn == 0) {
+ prev_level = lvl;
+ prev_type = type;
+ }
+ if (pfn == MAX_DOMAIN_PAGES-1) {
+ lvl = 3;
+ type = TYPE_UNKNOWN;
+ }
+ if (prev_type != type) {
+ seq_printf(m, " [0x%lx->0x%lx] %s\n",
+ prev_pfn_type, pfn, type_name[prev_type]);
+ prev_pfn_type = pfn;
+ prev_type = type;
+ }
+ if (prev_level != lvl) {
+ seq_printf(m, " [0x%lx->0x%lx] level %s\n",
+ prev_pfn_level, pfn, level_name[prev_level]);
+ prev_pfn_level = pfn;
+ prev_level = lvl;
+ }
+ }
+ return 0;
+#undef TYPE_IDENTITY
+#undef TYPE_MISSING
+#undef TYPE_PFN
+#undef TYPE_UNKNOWN
+}
+#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a50d44..fa0269a9937 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
static __init void xen_add_extra_mem(unsigned long pages)
{
+ unsigned long pfn;
+
u64 size = (u64)pages * PAGE_SIZE;
u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
xen_extra_mem_size += size;
xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+
+ for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
start, end, ret);
if (ret == 1) {
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
len++;
}
}
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
return released;
}
+static unsigned long __init xen_set_identity(const struct e820entry *list,
+ ssize_t map_size)
+{
+ phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
+ phys_addr_t start_pci = last;
+ const struct e820entry *entry;
+ unsigned long identity = 0;
+ int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ phys_addr_t start = entry->addr;
+ phys_addr_t end = start + entry->size;
+
+ if (start < last)
+ start = last;
+
+ if (end <= start)
+ continue;
+
+ /* Skip over the 1MB region. */
+ if (last > end)
+ continue;
+
+ if (entry->type == E820_RAM) {
+ if (start > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(start));
+
+ /* Without saving 'last' we would gooble RAM too
+ * at the end of the loop. */
+ last = end;
+ start_pci = end;
+ continue;
+ }
+ start_pci = min(start, start_pci);
+ last = end;
+ }
+ if (last > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(last));
+ return identity;
+}
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
+ static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
struct xen_memory_map memmap;
unsigned long extra_pages = 0;
unsigned long extra_limit;
+ unsigned long identity_pages = 0;
int i;
int op;
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
+ memcpy(map_raw, map, sizeof(map));
e820.nr_map = 0;
xen_extra_mem_start = mem_end;
for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
end -= delta;
extra_pages += PFN_DOWN(delta);
+ /*
+ * Set RAM below 4GB that is not for us to be unusable.
+ * This prevents "System RAM" address space from being
+ * used as potential resource for I/O address (happens
+ * when 'allocate_resource' is called).
+ */
+ if (delta &&
+ (xen_initial_domain() && end < 0x100000000ULL))
+ e820_add_region(end, delta, E820_UNUSABLE);
}
if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(extra_pages);
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. We supply it with the non-sanitized version
+ * of the E820.
+ */
+ identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c795904..30612441ed9 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
xen_fill_possible_map();
xen_init_spinlocks();
}
+
+static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+ native_smp_prepare_cpus(max_cpus);
+ WARN_ON(xen_smp_intr_init(0));
+
+ if (!xen_have_vector_callback)
+ return;
+ xen_init_lock_cpu(0);
+ xen_init_spinlocks();
+}
+
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
+{
+ int rc;
+ rc = native_cpu_up(cpu);
+ WARN_ON (xen_smp_intr_init(cpu));
+ return rc;
+}
+
+static void xen_hvm_cpu_die(unsigned int cpu)
+{
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ native_cpu_die(cpu);
+}
+
+void __init xen_hvm_smp_init(void)
+{
+ smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
+ smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
+ smp_ops.cpu_up = xen_hvm_cpu_up;
+ smp_ops.cpu_die = xen_hvm_cpu_die;
+ smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
+ smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
+}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a129b..45329c8c226 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
#include "xen-ops.h"
#include "mmu.h"
-void xen_pre_suspend(void)
+void xen_arch_pre_suspend(void)
{
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
BUG();
}
-void xen_hvm_post_suspend(int suspend_cancelled)
+void xen_arch_hvm_post_suspend(int suspend_cancelled)
{
+#ifdef CONFIG_XEN_PVHVM
int cpu;
xen_hvm_init_shared_info();
xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
xen_setup_runstate_info(cpu);
}
}
+#endif
}
-void xen_post_suspend(int suspend_cancelled)
+void xen_arch_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a..2e2d370a47b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
+ IRQF_DISABLED|IRQF_PERCPU|
+ IRQF_NOBALANCING|IRQF_TIMER|
+ IRQF_FORCE_RESUME,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24e29c..aaa7291c925 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,9 @@ ENTRY(startup_xen)
__FINIT
.pushsection .text
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(hypercall_page)
- .skip PAGE_SIZE_asm
+ .skip PAGE_SIZE
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf98575..3112f55638c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void __init xen_hvm_smp_init(void);
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
+static inline void xen_hvm_smp_init(void) {}
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f..249619e7e7f 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 19df764f639..f3e5eb43f71 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -96,16 +96,12 @@ again:
update_process_times(user_mode(get_irq_regs()));
#endif
- write_seqlock(&xtime_lock);
-
- do_timer(1); /* Linux handler in kernel/timer.c */
+ xtime_update(1); /* Linux handler in kernel/time/timekeeping */
/* Note that writing CCOMPARE clears the interrupt. */
next += CCOUNT_PER_JIFFY;
set_linux_timer(next);
-
- write_sequnlock(&xtime_lock);
}
/* Allow platform to do something useful (Wdog). */
diff --git a/block/blk-lib.c b/block/blk-lib.c
index eec78becb35..bd3e8df4d5e 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -109,7 +109,6 @@ struct bio_batch
atomic_t done;
unsigned long flags;
struct completion *wait;
- bio_end_io_t *end_io;
};
static void bio_batch_end_io(struct bio *bio, int err)
@@ -122,12 +121,9 @@ static void bio_batch_end_io(struct bio *bio, int err)
else
clear_bit(BIO_UPTODATE, &bb->flags);
}
- if (bb) {
- if (bb->end_io)
- bb->end_io(bio, err);
- atomic_inc(&bb->done);
- complete(bb->wait);
- }
+ if (bb)
+ if (atomic_dec_and_test(&bb->done))
+ complete(bb->wait);
bio_put(bio);
}
@@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
int ret;
struct bio *bio;
struct bio_batch bb;
- unsigned int sz, issued = 0;
+ unsigned int sz;
DECLARE_COMPLETION_ONSTACK(wait);
- atomic_set(&bb.done, 0);
+ atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
- bb.end_io = NULL;
submit:
ret = 0;
@@ -185,12 +180,12 @@ submit:
break;
}
ret = 0;
- issued++;
+ atomic_inc(&bb.done);
submit_bio(WRITE, bio);
}
/* Wait for bios in-flight */
- while (issued != atomic_read(&bb.done))
+ if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 5eb25eb3ea4..3b5c3189fd9 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -274,7 +274,7 @@ acpi_table_parse_srat(enum acpi_srat_type id,
int __init acpi_numa_init(void)
{
- int ret = 0;
+ int cnt = 0;
/*
* Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
@@ -288,7 +288,7 @@ int __init acpi_numa_init(void)
acpi_parse_x2apic_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
acpi_parse_processor_affinity, 0);
- ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
+ cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
}
@@ -297,7 +297,10 @@ int __init acpi_numa_init(void)
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
acpi_numa_arch_fixup();
- return ret;
+
+ if (cnt <= 0)
+ return cnt ?: -ENOENT;
+ return 0;
}
int acpi_get_pxm(acpi_handle h)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d7aa39e349a..9cb8668ff5f 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
#define EXTENDED (1<<EXT_SHIFT)
#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
+#define EMULATED_HD_DISK_MINOR_OFFSET (0)
+#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
+#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
#define DEV_NAME "xvd" /* name in /dev */
@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
info->shadow[id].request = req;
ring_req->id = id;
- ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
+ ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
ring_req->handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
rq_data_dir(req) );
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
- ring_req->seg[i] =
+ ring_req->u.rw.seg[i] =
(struct blkif_request_segment) {
.gref = ref,
.first_sect = fsect,
@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
info->feature_flush ? "enabled" : "disabled");
}
+static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
+{
+ int major;
+ major = BLKIF_MAJOR(vdevice);
+ *minor = BLKIF_MINOR(vdevice);
+ switch (major) {
+ case XEN_IDE0_MAJOR:
+ *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
+ *minor = ((*minor / 64) * PARTS_PER_DISK) +
+ EMULATED_HD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_IDE1_MAJOR:
+ *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
+ *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
+ EMULATED_HD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_SCSI_DISK0_MAJOR:
+ *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
+ *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_SCSI_DISK1_MAJOR:
+ case XEN_SCSI_DISK2_MAJOR:
+ case XEN_SCSI_DISK3_MAJOR:
+ case XEN_SCSI_DISK4_MAJOR:
+ case XEN_SCSI_DISK5_MAJOR:
+ case XEN_SCSI_DISK6_MAJOR:
+ case XEN_SCSI_DISK7_MAJOR:
+ *offset = (*minor / PARTS_PER_DISK) +
+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
+ EMULATED_SD_DISK_NAME_OFFSET;
+ *minor = *minor +
+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
+ EMULATED_SD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_SCSI_DISK8_MAJOR:
+ case XEN_SCSI_DISK9_MAJOR:
+ case XEN_SCSI_DISK10_MAJOR:
+ case XEN_SCSI_DISK11_MAJOR:
+ case XEN_SCSI_DISK12_MAJOR:
+ case XEN_SCSI_DISK13_MAJOR:
+ case XEN_SCSI_DISK14_MAJOR:
+ case XEN_SCSI_DISK15_MAJOR:
+ *offset = (*minor / PARTS_PER_DISK) +
+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
+ EMULATED_SD_DISK_NAME_OFFSET;
+ *minor = *minor +
+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
+ EMULATED_SD_DISK_MINOR_OFFSET;
+ break;
+ case XENVBD_MAJOR:
+ *offset = *minor / PARTS_PER_DISK;
+ break;
+ default:
+ printk(KERN_WARNING "blkfront: your disk configuration is "
+ "incorrect, please use an xvd device instead\n");
+ return -ENODEV;
+ }
+ return 0;
+}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
struct blkfront_info *info,
@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
{
struct gendisk *gd;
int nr_minors = 1;
- int err = -ENODEV;
+ int err;
unsigned int offset;
int minor;
int nr_parts;
@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
}
if (!VDEV_IS_EXTENDED(info->vdevice)) {
- minor = BLKIF_MINOR(info->vdevice);
- nr_parts = PARTS_PER_DISK;
+ err = xen_translate_vdev(info->vdevice, &minor, &offset);
+ if (err)
+ return err;
+ nr_parts = PARTS_PER_DISK;
} else {
minor = BLKIF_MINOR_EXT(info->vdevice);
nr_parts = PARTS_PER_EXT_DISK;
+ offset = minor / nr_parts;
+ if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
+ printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
+ "emulated IDE disks,\n\t choose an xvd device name"
+ "from xvde on\n", info->vdevice);
}
+ err = -ENODEV;
if ((minor % nr_parts) == 0)
nr_minors = nr_parts;
@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (gd == NULL)
goto release;
- offset = minor / nr_parts;
-
if (nr_minors > 1) {
if (offset < 26)
sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
{
int i;
for (i = 0; i < s->req.nr_segments; i++)
- gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+ gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
/* Rewrite any grant references invalidated by susp/resume. */
for (j = 0; j < req->nr_segments; j++)
gnttab_grant_foreign_access_ref(
- req->seg[j].gref,
+ req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->id].frame[j]),
rq_data_dir(info->shadow[req->id].request));
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 7855f9f45b8..62787e30d50 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -900,6 +900,14 @@ static void sender(void *send_info,
printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
#endif
+ /*
+ * last_timeout_jiffies is updated here to avoid
+ * smi_timeout() handler passing very large time_diff
+ * value to smi_event_handler() that causes
+ * the send command to abort.
+ */
+ smi_info->last_timeout_jiffies = jiffies;
+
mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
if (smi_info->thread)
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index e6d75627c6c..33dc2298af7 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -53,6 +53,8 @@ MODULE_LICENSE("GPL");
#define RTC_BITS 55 /* 55 bits for this implementation */
+static struct k_clock sgi_clock;
+
extern unsigned long sn_rtc_cycles_per_second;
#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC))
@@ -487,7 +489,7 @@ static int sgi_clock_get(clockid_t clockid, struct timespec *tp)
return 0;
};
-static int sgi_clock_set(clockid_t clockid, struct timespec *tp)
+static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp)
{
u64 nsec;
@@ -763,15 +765,21 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
return err;
}
+static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+ tp->tv_sec = 0;
+ tp->tv_nsec = sgi_clock_period;
+ return 0;
+}
+
static struct k_clock sgi_clock = {
- .res = 0,
- .clock_set = sgi_clock_set,
- .clock_get = sgi_clock_get,
- .timer_create = sgi_timer_create,
- .nsleep = do_posix_clock_nonanosleep,
- .timer_set = sgi_timer_set,
- .timer_del = sgi_timer_del,
- .timer_get = sgi_timer_get
+ .clock_set = sgi_clock_set,
+ .clock_get = sgi_clock_get,
+ .clock_getres = sgi_clock_getres,
+ .timer_create = sgi_timer_create,
+ .timer_set = sgi_timer_set,
+ .timer_del = sgi_timer_del,
+ .timer_get = sgi_timer_get
};
/**
@@ -831,8 +839,8 @@ static int __init mmtimer_init(void)
(unsigned long) node);
}
- sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second;
- register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock);
+ sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second;
+ posix_timers_register_clock(CLOCK_SGI_CYCLE, &sgi_clock);
printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION,
sn_rtc_cycles_per_second/(unsigned long)1E6);
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index cead8e6ff34..7f6f01a4b14 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -326,6 +326,7 @@ static DEFINE_PCI_DEVICE_TABLE(ioh_gpio_pcidev_id) = {
{ PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) },
{ 0, }
};
+MODULE_DEVICE_TABLE(pci, ioh_gpio_pcidev_id);
static struct pci_driver ioh_gpio_driver = {
.name = "ml_ioh_gpio",
diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index 0eba0a75c80..2c6af870510 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c
@@ -286,6 +286,7 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
{ 0, }
};
+MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id);
static struct pci_driver pch_gpio_driver = {
.name = "pch_gpio",
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3e6f486f460..2abe240dae5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1553,7 +1553,17 @@
/* Backlight control */
#define BLC_PWM_CTL 0x61254
+#define BACKLIGHT_MODULATION_FREQ_SHIFT (17)
#define BLC_PWM_CTL2 0x61250 /* 965+ only */
+#define BLM_COMBINATION_MODE (1 << 30)
+/*
+ * This is the most significant 15 bits of the number of backlight cycles in a
+ * complete cycle of the modulated backlight control.
+ *
+ * The actual value is this field multiplied by two.
+ */
+#define BACKLIGHT_MODULATION_FREQ_MASK (0x7fff << 17)
+#define BLM_LEGACY_MODE (1 << 16)
/*
* This is the number of cycles out of the backlight modulation cycle for which
* the backlight is on.
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index d860abeda70..f8f86e57df2 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -30,6 +30,8 @@
#include "intel_drv.h"
+#define PCI_LBPC 0xf4 /* legacy/combination backlight modes */
+
void
intel_fixed_panel_mode(struct drm_display_mode *fixed_mode,
struct drm_display_mode *adjusted_mode)
@@ -110,6 +112,19 @@ done:
dev_priv->pch_pf_size = (width << 16) | height;
}
+static int is_backlight_combination_mode(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ if (INTEL_INFO(dev)->gen >= 4)
+ return I915_READ(BLC_PWM_CTL2) & BLM_COMBINATION_MODE;
+
+ if (IS_GEN2(dev))
+ return I915_READ(BLC_PWM_CTL) & BLM_LEGACY_MODE;
+
+ return 0;
+}
+
static u32 i915_read_blc_pwm_ctl(struct drm_i915_private *dev_priv)
{
u32 val;
@@ -166,6 +181,9 @@ u32 intel_panel_get_max_backlight(struct drm_device *dev)
if (INTEL_INFO(dev)->gen < 4)
max &= ~1;
}
+
+ if (is_backlight_combination_mode(dev))
+ max *= 0xff;
}
DRM_DEBUG_DRIVER("max backlight PWM = %d\n", max);
@@ -183,6 +201,14 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
if (IS_PINEVIEW(dev))
val >>= 1;
+
+ if (is_backlight_combination_mode(dev)){
+ u8 lbpc;
+
+ val &= ~1;
+ pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc);
+ val *= lbpc;
+ }
}
DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val);
@@ -205,6 +231,16 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
if (HAS_PCH_SPLIT(dev))
return intel_pch_panel_set_backlight(dev, level);
+
+ if (is_backlight_combination_mode(dev)){
+ u32 max = intel_panel_get_max_backlight(dev);
+ u8 lbpc;
+
+ lbpc = level * 0xfe / max + 1;
+ level /= lbpc;
+ pci_write_config_byte(dev->pdev, PCI_LBPC, lbpc);
+ }
+
tmp = I915_READ(BLC_PWM_CTL);
if (IS_PINEVIEW(dev)) {
tmp &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d270b3ff896..6140ea1de45 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2194,7 +2194,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
}
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
r700_vram_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
@@ -2934,7 +2933,7 @@ static int evergreen_startup(struct radeon_device *rdev)
/* XXX: ontario has problems blitting to gart at the moment */
if (rdev->family == CHIP_PALM) {
rdev->asic->copy = NULL;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
}
/* allocate wb buffer */
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2adfb03f479..2be698e78ff 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -623,7 +623,7 @@ done:
dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
return r;
}
- rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
@@ -631,7 +631,7 @@ void evergreen_blit_fini(struct radeon_device *rdev)
{
int r;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
if (rdev->r600_blit.shader_obj == NULL)
return;
/* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 93fa735c8c1..e372f9e1e5c 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -70,23 +70,6 @@ MODULE_FIRMWARE(FIRMWARE_R520);
void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
{
- struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
- u32 tmp;
-
- /* make sure flip is at vb rather than hb */
- tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset);
- tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL;
- /* make sure pending bit is asserted */
- tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
- WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp);
-
- /* set pageflip to happen as late as possible in the vblank interval.
- * same field for crtc1/2
- */
- tmp = RREG32(RADEON_CRTC_GEN_CNTL);
- tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK;
- WREG32(RADEON_CRTC_GEN_CNTL, tmp);
-
/* enable the pflip int */
radeon_irq_kms_pflip_irq_get(rdev, crtc);
}
@@ -1041,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
return r;
}
rdev->cp.ready = true;
- rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
@@ -1059,7 +1042,7 @@ void r100_cp_fini(struct radeon_device *rdev)
void r100_cp_disable(struct radeon_device *rdev)
{
/* Disable ring */
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
rdev->cp.ready = false;
WREG32(RADEON_CP_CSQ_MODE, 0);
WREG32(RADEON_CP_CSQ_CNTL, 0);
@@ -2329,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
/* FIXME we don't use the second aperture yet when we could use it */
if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
if (rdev->flags & RADEON_IS_IGP) {
uint32_t tom;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index de88624d5f8..9b3fad23b76 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1255,7 +1255,6 @@ int r600_mc_init(struct radeon_device *rdev)
rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
r600_vram_gtt_location(rdev, &rdev->mc);
if (rdev->flags & RADEON_IS_IGP) {
@@ -1937,7 +1936,7 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
*/
void r600_cp_stop(struct radeon_device *rdev)
{
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
WREG32(SCRATCH_UMSK, 0);
}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 41f7aafc97c..df68d91e819 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -558,7 +558,7 @@ done:
dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
return r;
}
- rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
@@ -566,7 +566,7 @@ void r600_blit_fini(struct radeon_device *rdev)
{
int r;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
if (rdev->r600_blit.shader_obj == NULL)
return;
/* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 56c48b67ef3..6b342949511 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -345,7 +345,6 @@ struct radeon_mc {
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
- u64 active_vram_size;
u64 gtt_size;
u64 gtt_start;
u64 gtt_end;
@@ -1448,6 +1447,7 @@ extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *m
extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
extern int radeon_resume_kms(struct drm_device *dev);
extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
+extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */
extern bool r600_card_posted(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index e75d63b8e21..793c5e6026a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -834,6 +834,9 @@ static struct radeon_asic sumo_asic = {
.pm_finish = &evergreen_pm_finish,
.pm_init_profile = &rs780_pm_init_profile,
.pm_get_dynpm_state = &r600_pm_get_dynpm_state,
+ .pre_page_flip = &evergreen_pre_page_flip,
+ .page_flip = &evergreen_page_flip,
+ .post_page_flip = &evergreen_post_page_flip,
};
static struct radeon_asic btc_asic = {
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index df95eb83dac..1fe95dfe48c 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -156,9 +156,12 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
{
struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_info *args = data;
+ struct ttm_mem_type_manager *man;
+
+ man = &rdev->mman.bdev.man[TTM_PL_VRAM];
args->vram_size = rdev->mc.real_vram_size;
- args->vram_visible = rdev->mc.real_vram_size;
+ args->vram_visible = (u64)man->size << PAGE_SHIFT;
if (rdev->stollen_vga_memory)
args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory);
args->vram_visible -= radeon_fbdev_total_size(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index cf0638c3b7c..78968b738e8 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -443,7 +443,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
(target_fb->bits_per_pixel * 8));
crtc_pitch |= crtc_pitch << 16;
-
+ crtc_offset_cntl |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
if (tiling_flags & RADEON_TILING_MACRO) {
if (ASIC_IS_R300(rdev))
crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
@@ -502,6 +502,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
gen_cntl_val = RREG32(gen_cntl_reg);
gen_cntl_val &= ~(0xf << 8);
gen_cntl_val |= (format << 8);
+ gen_cntl_val &= ~RADEON_CRTC_VSTAT_MODE_MASK;
WREG32(gen_cntl_reg, gen_cntl_val);
crtc_offset = (u32)base;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e5b2cf10cbf..8389b4c63d1 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -589,6 +589,20 @@ void radeon_ttm_fini(struct radeon_device *rdev)
DRM_INFO("radeon: ttm finalized\n");
}
+/* this should only be called at bootup or when userspace
+ * isn't running */
+void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
+{
+ struct ttm_mem_type_manager *man;
+
+ if (!rdev->mman.initialized)
+ return;
+
+ man = &rdev->mman.bdev.man[TTM_PL_VRAM];
+ /* this just adjusts TTM size idea, which sets lpfn to the correct value */
+ man->size = size >> PAGE_SHIFT;
+}
+
static struct vm_operations_struct radeon_ttm_vm_ops;
static const struct vm_operations_struct *ttm_vm_ops = NULL;
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 5afe294ed51..8af4679db23 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -751,7 +751,6 @@ void rs600_mc_init(struct radeon_device *rdev)
rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
base = RREG32_MC(R_000004_MC_FB_LOCATION);
base = G_000004_MC_FB_START(base) << 16;
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 6638c8e4c81..66c949b7c18 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -157,7 +157,6 @@ void rs690_mc_init(struct radeon_device *rdev)
rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
base = G_000100_MC_FB_START(base) << 16;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d8ba6769065..714ad45757d 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -307,7 +307,7 @@ static void rv770_mc_program(struct radeon_device *rdev)
*/
void r700_cp_stop(struct radeon_device *rdev)
{
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
}
@@ -1123,7 +1123,6 @@ int rv770_mc_init(struct radeon_device *rdev)
rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
r700_vram_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 3f49dd376f0..6e06019015a 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -37,7 +37,7 @@
#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */
#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */
#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */
-#define SIO_LOCK_KEY 0xAA /* Key to diasble Super-I/O */
+#define SIO_LOCK_KEY 0xAA /* Key to disable Super-I/O */
#define SIO_REG_LDSEL 0x07 /* Logical device select */
#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */
@@ -2111,7 +2111,6 @@ static int f71882fg_remove(struct platform_device *pdev)
int nr_fans = (data->type == f71882fg) ? 4 : 3;
u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
- platform_set_drvdata(pdev, NULL);
if (data->hwmon_dev)
hwmon_device_unregister(data->hwmon_dev);
@@ -2178,6 +2177,7 @@ static int f71882fg_remove(struct platform_device *pdev)
}
}
+ platform_set_drvdata(pdev, NULL);
kfree(data);
return 0;
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 2e067dd2ee5..50ea1f43bdc 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -29,6 +29,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <linux/ktime.h>
+#include <linux/slab.h>
#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */
#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index ef3bcb1ce86..1b46a9d9f90 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -249,7 +249,7 @@ static struct i2c_adapter ocores_adapter = {
static int ocores_i2c_of_probe(struct platform_device* pdev,
struct ocores_i2c* i2c)
{
- __be32* val;
+ const __be32* val;
val = of_get_property(pdev->dev.of_node, "regstep", NULL);
if (!val) {
@@ -330,9 +330,7 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
i2c->adap = ocores_adapter;
i2c_set_adapdata(&i2c->adap, i2c);
i2c->adap.dev.parent = &pdev->dev;
-#ifdef CONFIG_OF
i2c->adap.dev.of_node = pdev->dev.of_node;
-#endif
/* add i2c adapter to i2c tree */
ret = i2c_add_adapter(&i2c->adap);
@@ -390,15 +388,11 @@ static int ocores_i2c_resume(struct platform_device *pdev)
#define ocores_i2c_resume NULL
#endif
-#ifdef CONFIG_OF
static struct of_device_id ocores_i2c_match[] = {
- {
- .compatible = "opencores,i2c-ocores",
- },
- {},
+ { .compatible = "opencores,i2c-ocores", },
+ {},
};
MODULE_DEVICE_TABLE(of, ocores_i2c_match);
-#endif
/* work with hotplug and coldplug */
MODULE_ALIAS("platform:ocores-i2c");
@@ -411,9 +405,7 @@ static struct platform_driver ocores_i2c_driver = {
.driver = {
.owner = THIS_MODULE,
.name = "ocores-i2c",
-#ifdef CONFIG_OF
- .of_match_table = ocores_i2c_match,
-#endif
+ .of_match_table = ocores_i2c_match,
},
};
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 829a2a1029f..58a58c7eaa1 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -378,9 +378,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
* REVISIT: Some wkup sources might not be needed.
*/
dev->westate = OMAP_I2C_WE_ALL;
- if (dev->rev < OMAP_I2C_REV_ON_4430)
- omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
- dev->westate);
+ omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
}
}
omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index f0bd5bcdf56..045ba6efea4 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -537,9 +537,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
client->dev.parent = &client->adapter->dev;
client->dev.bus = &i2c_bus_type;
client->dev.type = &i2c_client_type;
-#ifdef CONFIG_OF
client->dev.of_node = info->of_node;
-#endif
dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
client->addr);
diff --git a/drivers/media/common/tuners/tda8290.c b/drivers/media/common/tuners/tda8290.c
index bc6a67768af..8c4852114ee 100644
--- a/drivers/media/common/tuners/tda8290.c
+++ b/drivers/media/common/tuners/tda8290.c
@@ -658,13 +658,13 @@ static int tda8290_probe(struct tuner_i2c_props *i2c_props)
#define TDA8290_ID 0x89
u8 reg = 0x1f, id;
struct i2c_msg msg_read[] = {
- { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg },
- { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id },
+ { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
+ { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
};
/* detect tda8290 */
if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
- printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n",
+ printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
__func__, reg);
return -ENODEV;
}
@@ -685,13 +685,13 @@ static int tda8295_probe(struct tuner_i2c_props *i2c_props)
#define TDA8295C2_ID 0x8b
u8 reg = 0x2f, id;
struct i2c_msg msg_read[] = {
- { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg },
- { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id },
+ { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
+ { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
};
- /* detect tda8290 */
+ /* detect tda8295 */
if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
- printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n",
+ printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
__func__, reg);
return -ENODEV;
}
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index defd83964ce..193cdb77b76 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -870,6 +870,23 @@ static int dib7070p_tuner_attach(struct dvb_usb_adapter *adap)
return 0;
}
+static int stk7700p_pid_filter(struct dvb_usb_adapter *adapter, int index,
+ u16 pid, int onoff)
+{
+ struct dib0700_state *st = adapter->dev->priv;
+ if (st->is_dib7000pc)
+ return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
+ return dib7000m_pid_filter(adapter->fe, index, pid, onoff);
+}
+
+static int stk7700p_pid_filter_ctrl(struct dvb_usb_adapter *adapter, int onoff)
+{
+ struct dib0700_state *st = adapter->dev->priv;
+ if (st->is_dib7000pc)
+ return dib7000p_pid_filter_ctrl(adapter->fe, onoff);
+ return dib7000m_pid_filter_ctrl(adapter->fe, onoff);
+}
+
static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff)
{
return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
@@ -1875,8 +1892,8 @@ struct dvb_usb_device_properties dib0700_devices[] = {
{
.caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
.pid_filter_count = 32,
- .pid_filter = stk70x0p_pid_filter,
- .pid_filter_ctrl = stk70x0p_pid_filter_ctrl,
+ .pid_filter = stk7700p_pid_filter,
+ .pid_filter_ctrl = stk7700p_pid_filter_ctrl,
.frontend_attach = stk7700p_frontend_attach,
.tuner_attach = stk7700p_tuner_attach,
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.c b/drivers/media/dvb/dvb-usb/lmedm04.c
index 9eea4188303..46ccd01a769 100644
--- a/drivers/media/dvb/dvb-usb/lmedm04.c
+++ b/drivers/media/dvb/dvb-usb/lmedm04.c
@@ -659,7 +659,7 @@ static int lme2510_download_firmware(struct usb_device *dev,
}
/* Default firmware for LME2510C */
-const char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
+char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
static void lme_coldreset(struct usb_device *dev)
{
@@ -1006,7 +1006,7 @@ static struct dvb_usb_device_properties lme2510c_properties = {
.caps = DVB_USB_IS_AN_I2C_ADAPTER,
.usb_ctrl = DEVICE_SPECIFIC,
.download_firmware = lme2510_download_firmware,
- .firmware = lme_firmware,
+ .firmware = (const char *)&lme_firmware,
.size_of_priv = sizeof(struct lme2510_state),
.num_adapters = 1,
.adapter = {
@@ -1109,5 +1109,5 @@ module_exit(lme2510_module_exit);
MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>");
MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0");
-MODULE_VERSION("1.74");
+MODULE_VERSION("1.75");
MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb/frontends/dib7000m.c b/drivers/media/dvb/frontends/dib7000m.c
index c7f5ccf54aa..289a79837f2 100644
--- a/drivers/media/dvb/frontends/dib7000m.c
+++ b/drivers/media/dvb/frontends/dib7000m.c
@@ -1285,6 +1285,25 @@ struct i2c_adapter * dib7000m_get_i2c_master(struct dvb_frontend *demod, enum di
}
EXPORT_SYMBOL(dib7000m_get_i2c_master);
+int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff)
+{
+ struct dib7000m_state *state = fe->demodulator_priv;
+ u16 val = dib7000m_read_word(state, 294 + state->reg_offs) & 0xffef;
+ val |= (onoff & 0x1) << 4;
+ dprintk("PID filter enabled %d", onoff);
+ return dib7000m_write_word(state, 294 + state->reg_offs, val);
+}
+EXPORT_SYMBOL(dib7000m_pid_filter_ctrl);
+
+int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id, u16 pid, u8 onoff)
+{
+ struct dib7000m_state *state = fe->demodulator_priv;
+ dprintk("PID filter: index %x, PID %d, OnOff %d", id, pid, onoff);
+ return dib7000m_write_word(state, 300 + state->reg_offs + id,
+ onoff ? (1 << 13) | pid : 0);
+}
+EXPORT_SYMBOL(dib7000m_pid_filter);
+
#if 0
/* used with some prototype boards */
int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods,
diff --git a/drivers/media/dvb/frontends/dib7000m.h b/drivers/media/dvb/frontends/dib7000m.h
index 113819ce9f0..81fcf2241c6 100644
--- a/drivers/media/dvb/frontends/dib7000m.h
+++ b/drivers/media/dvb/frontends/dib7000m.h
@@ -46,6 +46,8 @@ extern struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *,
enum dibx000_i2c_interface,
int);
+extern int dib7000m_pid_filter(struct dvb_frontend *, u8 id, u16 pid, u8 onoff);
+extern int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff);
#else
static inline
struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
@@ -63,6 +65,19 @@ struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *demod,
printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
return NULL;
}
+static inline int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id,
+ u16 pid, u8 onoff)
+{
+ printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
+ return -ENODEV;
+}
+
+static inline int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe,
+ uint8_t onoff)
+{
+ printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
+ return -ENODEV;
+}
#endif
/* TODO
diff --git a/drivers/media/dvb/mantis/mantis_pci.c b/drivers/media/dvb/mantis/mantis_pci.c
index 59feeb84aec..10a432a79d0 100644
--- a/drivers/media/dvb/mantis/mantis_pci.c
+++ b/drivers/media/dvb/mantis/mantis_pci.c
@@ -22,7 +22,6 @@
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/page.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
diff --git a/drivers/media/rc/ir-raw.c b/drivers/media/rc/ir-raw.c
index 73230ff93b8..01f258a2a57 100644
--- a/drivers/media/rc/ir-raw.c
+++ b/drivers/media/rc/ir-raw.c
@@ -112,7 +112,7 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
{
ktime_t now;
s64 delta; /* ns */
- struct ir_raw_event ev;
+ DEFINE_IR_RAW_EVENT(ev);
int rc = 0;
if (!dev->raw)
@@ -125,7 +125,6 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
* being called for the first time, note that delta can't
* possibly be negative.
*/
- ev.duration = 0;
if (delta > IR_MAX_DURATION || !dev->raw->last_type)
type |= IR_START_EVENT;
else
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 6df0a498064..e4f8eac7f71 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -148,6 +148,7 @@ enum mceusb_model_type {
MCE_GEN2_TX_INV,
POLARIS_EVK,
CX_HYBRID_TV,
+ MULTIFUNCTION,
};
struct mceusb_model {
@@ -155,9 +156,10 @@ struct mceusb_model {
u32 mce_gen2:1;
u32 mce_gen3:1;
u32 tx_mask_normal:1;
- u32 is_polaris:1;
u32 no_tx:1;
+ int ir_intfnum;
+
const char *rc_map; /* Allow specify a per-board map */
const char *name; /* per-board name */
};
@@ -179,7 +181,6 @@ static const struct mceusb_model mceusb_model[] = {
.tx_mask_normal = 1,
},
[POLARIS_EVK] = {
- .is_polaris = 1,
/*
* In fact, the EVK is shipped without
* remotes, but we should have something handy,
@@ -189,10 +190,13 @@ static const struct mceusb_model mceusb_model[] = {
.name = "Conexant Hybrid TV (cx231xx) MCE IR",
},
[CX_HYBRID_TV] = {
- .is_polaris = 1,
.no_tx = 1, /* tx isn't wired up at all */
.name = "Conexant Hybrid TV (cx231xx) MCE IR",
},
+ [MULTIFUNCTION] = {
+ .mce_gen2 = 1,
+ .ir_intfnum = 2,
+ },
};
static struct usb_device_id mceusb_dev_table[] = {
@@ -216,8 +220,9 @@ static struct usb_device_id mceusb_dev_table[] = {
{ USB_DEVICE(VENDOR_PHILIPS, 0x206c) },
/* Philips/Spinel plus IR transceiver for ASUS */
{ USB_DEVICE(VENDOR_PHILIPS, 0x2088) },
- /* Realtek MCE IR Receiver */
- { USB_DEVICE(VENDOR_REALTEK, 0x0161) },
+ /* Realtek MCE IR Receiver and card reader */
+ { USB_DEVICE(VENDOR_REALTEK, 0x0161),
+ .driver_info = MULTIFUNCTION },
/* SMK/Toshiba G83C0004D410 */
{ USB_DEVICE(VENDOR_SMK, 0x031d),
.driver_info = MCE_GEN2_TX_INV },
@@ -1101,7 +1106,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
bool is_gen3;
bool is_microsoft_gen1;
bool tx_mask_normal;
- bool is_polaris;
+ int ir_intfnum;
dev_dbg(&intf->dev, "%s called\n", __func__);
@@ -1110,13 +1115,11 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
is_gen3 = mceusb_model[model].mce_gen3;
is_microsoft_gen1 = mceusb_model[model].mce_gen1;
tx_mask_normal = mceusb_model[model].tx_mask_normal;
- is_polaris = mceusb_model[model].is_polaris;
+ ir_intfnum = mceusb_model[model].ir_intfnum;
- if (is_polaris) {
- /* Interface 0 is IR */
- if (idesc->desc.bInterfaceNumber)
- return -ENODEV;
- }
+ /* There are multi-function devices with non-IR interfaces */
+ if (idesc->desc.bInterfaceNumber != ir_intfnum)
+ return -ENODEV;
/* step through the endpoints to find first bulk in and out endpoint */
for (i = 0; i < idesc->desc.bNumEndpoints; ++i) {
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 273d9d67479..d4d64492a05 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -385,8 +385,9 @@ static void nvt_cir_regs_init(struct nvt_dev *nvt)
static void nvt_cir_wake_regs_init(struct nvt_dev *nvt)
{
- /* set number of bytes needed for wake key comparison (default 67) */
- nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_LEN, CIR_WAKE_FIFO_CMP_DEEP);
+ /* set number of bytes needed for wake from s3 (default 65) */
+ nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_CMP_BYTES,
+ CIR_WAKE_FIFO_CMP_DEEP);
/* set tolerance/variance allowed per byte during wake compare */
nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE,
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h
index 1df82351cb0..048135eea70 100644
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -305,8 +305,11 @@ struct nvt_dev {
#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20
#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10
-/* CIR Wake FIFO buffer is 67 bytes long */
-#define CIR_WAKE_FIFO_LEN 67
+/*
+ * The CIR Wake FIFO buffer is 67 bytes long, but the stock remote wakes
+ * the system comparing only 65 bytes (fails with this set to 67)
+ */
+#define CIR_WAKE_FIFO_CMP_BYTES 65
/* CIR Wake byte comparison tolerance */
#define CIR_WAKE_CMP_TOLERANCE 5
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 512a2f4ada0..5b4422ef4e6 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -850,7 +850,7 @@ static ssize_t store_protocols(struct device *device,
count++;
} else {
for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
- if (!strncasecmp(tmp, proto_names[i].name, strlen(proto_names[i].name))) {
+ if (!strcasecmp(tmp, proto_names[i].name)) {
tmp += strlen(proto_names[i].name);
mask = proto_names[i].type;
break;
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index e41e4ad5cc4..9c475c600fc 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -1758,7 +1758,12 @@ static int vidioc_reqbufs(struct file *file, void *priv,
if (rc < 0)
return rc;
- return videobuf_reqbufs(&fh->vb_vidq, rb);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_reqbufs(&fh->vb_vidq, rb);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_reqbufs(&fh->vb_vbiq, rb);
+
+ return rc;
}
static int vidioc_querybuf(struct file *file, void *priv,
@@ -1772,7 +1777,12 @@ static int vidioc_querybuf(struct file *file, void *priv,
if (rc < 0)
return rc;
- return videobuf_querybuf(&fh->vb_vidq, b);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_querybuf(&fh->vb_vidq, b);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_querybuf(&fh->vb_vbiq, b);
+
+ return rc;
}
static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1785,7 +1795,12 @@ static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
if (rc < 0)
return rc;
- return videobuf_qbuf(&fh->vb_vidq, b);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_qbuf(&fh->vb_vidq, b);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_qbuf(&fh->vb_vbiq, b);
+
+ return rc;
}
static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1806,7 +1821,12 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
dev->greenscreen_detected = 0;
}
- return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_dqbuf(&fh->vb_vbiq, b, file->f_flags & O_NONBLOCK);
+
+ return rc;
}
static struct v4l2_file_operations au0828_v4l_fops = {
diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 87177733cf9..68ad1963f42 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c
@@ -95,6 +95,53 @@ static const struct cx18_card cx18_card_hvr1600_esmt = {
.i2c = &cx18_i2c_std,
};
+static const struct cx18_card cx18_card_hvr1600_s5h1411 = {
+ .type = CX18_CARD_HVR_1600_S5H1411,
+ .name = "Hauppauge HVR-1600",
+ .comment = "Simultaneous Digital and Analog TV capture supported\n",
+ .v4l2_capabilities = CX18_CAP_ENCODER,
+ .hw_audio_ctrl = CX18_HW_418_AV,
+ .hw_muxer = CX18_HW_CS5345,
+ .hw_all = CX18_HW_TVEEPROM | CX18_HW_418_AV | CX18_HW_TUNER |
+ CX18_HW_CS5345 | CX18_HW_DVB | CX18_HW_GPIO_RESET_CTRL |
+ CX18_HW_Z8F0811_IR_HAUP,
+ .video_inputs = {
+ { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 },
+ { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 },
+ { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 },
+ { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 },
+ { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 },
+ },
+ .audio_inputs = {
+ { CX18_CARD_INPUT_AUD_TUNER,
+ CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 },
+ { CX18_CARD_INPUT_LINE_IN1,
+ CX18_AV_AUDIO_SERIAL1, CS5345_IN_2 },
+ { CX18_CARD_INPUT_LINE_IN2,
+ CX18_AV_AUDIO_SERIAL1, CS5345_IN_3 },
+ },
+ .radio_input = { CX18_CARD_INPUT_AUD_TUNER,
+ CX18_AV_AUDIO_SERIAL1, CS5345_IN_4 },
+ .ddr = {
+ /* ESMT M13S128324A-5B memory */
+ .chip_config = 0x003,
+ .refresh = 0x30c,
+ .timing1 = 0x44220e82,
+ .timing2 = 0x08,
+ .tune_lane = 0,
+ .initial_emrs = 0,
+ },
+ .gpio_init.initial_value = 0x3001,
+ .gpio_init.direction = 0x3001,
+ .gpio_i2c_slave_reset = {
+ .active_lo_mask = 0x3001,
+ .msecs_asserted = 10,
+ .msecs_recovery = 40,
+ .ir_reset_mask = 0x0001,
+ },
+ .i2c = &cx18_i2c_std,
+};
+
static const struct cx18_card cx18_card_hvr1600_samsung = {
.type = CX18_CARD_HVR_1600_SAMSUNG,
.name = "Hauppauge HVR-1600 (Preproduction)",
@@ -523,7 +570,8 @@ static const struct cx18_card *cx18_card_list[] = {
&cx18_card_toshiba_qosmio_dvbt,
&cx18_card_leadtek_pvr2100,
&cx18_card_leadtek_dvr3100h,
- &cx18_card_gotview_dvd3
+ &cx18_card_gotview_dvd3,
+ &cx18_card_hvr1600_s5h1411
};
const struct cx18_card *cx18_get_card(u16 index)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 944af8adbe0..b1c3cbd9274 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -157,6 +157,7 @@ MODULE_PARM_DESC(cardtype,
"\t\t\t 7 = Leadtek WinFast PVR2100\n"
"\t\t\t 8 = Leadtek WinFast DVR3100 H\n"
"\t\t\t 9 = GoTView PCI DVD3 Hybrid\n"
+ "\t\t\t 10 = Hauppauge HVR 1600 (S5H1411)\n"
"\t\t\t 0 = Autodetect (default)\n"
"\t\t\t-1 = Ignore this card\n\t\t");
MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60");
@@ -337,6 +338,7 @@ void cx18_read_eeprom(struct cx18 *cx, struct tveeprom *tv)
switch (cx->card->type) {
case CX18_CARD_HVR_1600_ESMT:
case CX18_CARD_HVR_1600_SAMSUNG:
+ case CX18_CARD_HVR_1600_S5H1411:
tveeprom_hauppauge_analog(&c, tv, eedata);
break;
case CX18_CARD_YUAN_MPC718:
@@ -365,7 +367,25 @@ static void cx18_process_eeprom(struct cx18 *cx)
from the model number. Use the cardtype module option if you
have one of these preproduction models. */
switch (tv.model) {
- case 74000 ... 74999:
+ case 74301: /* Retail models */
+ case 74321:
+ case 74351: /* OEM models */
+ case 74361:
+ /* Digital side is s5h1411/tda18271 */
+ cx->card = cx18_get_card(CX18_CARD_HVR_1600_S5H1411);
+ break;
+ case 74021: /* Retail models */
+ case 74031:
+ case 74041:
+ case 74141:
+ case 74541: /* OEM models */
+ case 74551:
+ case 74591:
+ case 74651:
+ case 74691:
+ case 74751:
+ case 74891:
+ /* Digital side is s5h1409/mxl5005s */
cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
break;
case 0x718:
@@ -377,7 +397,8 @@ static void cx18_process_eeprom(struct cx18 *cx)
CX18_ERR("Invalid EEPROM\n");
return;
default:
- CX18_ERR("Unknown model %d, defaulting to HVR-1600\n", tv.model);
+ CX18_ERR("Unknown model %d, defaulting to original HVR-1600 "
+ "(cardtype=1)\n", tv.model);
cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
break;
}
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 306caac6d3f..f736679d251 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -85,7 +85,8 @@
#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */
#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */
#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */
-#define CX18_CARD_LAST 8
+#define CX18_CARD_HVR_1600_S5H1411 9 /* Hauppauge HVR 1600 s5h1411/tda18271*/
+#define CX18_CARD_LAST 9
#define CX18_ENC_STREAM_TYPE_MPG 0
#define CX18_ENC_STREAM_TYPE_TS 1
diff --git a/drivers/media/video/cx18/cx18-dvb.c b/drivers/media/video/cx18/cx18-dvb.c
index f0381d62518..f41922bd402 100644
--- a/drivers/media/video/cx18/cx18-dvb.c
+++ b/drivers/media/video/cx18/cx18-dvb.c
@@ -29,6 +29,8 @@
#include "cx18-gpio.h"
#include "s5h1409.h"
#include "mxl5005s.h"
+#include "s5h1411.h"
+#include "tda18271.h"
#include "zl10353.h"
#include <linux/firmware.h>
@@ -77,6 +79,32 @@ static struct s5h1409_config hauppauge_hvr1600_config = {
};
/*
+ * CX18_CARD_HVR_1600_S5H1411
+ */
+static struct s5h1411_config hcw_s5h1411_config = {
+ .output_mode = S5H1411_SERIAL_OUTPUT,
+ .gpio = S5H1411_GPIO_OFF,
+ .vsb_if = S5H1411_IF_44000,
+ .qam_if = S5H1411_IF_4000,
+ .inversion = S5H1411_INVERSION_ON,
+ .status_mode = S5H1411_DEMODLOCKING,
+ .mpeg_timing = S5H1411_MPEGTIMING_CONTINOUS_NONINVERTING_CLOCK,
+};
+
+static struct tda18271_std_map hauppauge_tda18271_std_map = {
+ .atsc_6 = { .if_freq = 5380, .agc_mode = 3, .std = 3,
+ .if_lvl = 6, .rfagc_top = 0x37 },
+ .qam_6 = { .if_freq = 4000, .agc_mode = 3, .std = 0,
+ .if_lvl = 6, .rfagc_top = 0x37 },
+};
+
+static struct tda18271_config hauppauge_tda18271_config = {
+ .std_map = &hauppauge_tda18271_std_map,
+ .gate = TDA18271_GATE_DIGITAL,
+ .output_opt = TDA18271_OUTPUT_LT_OFF,
+};
+
+/*
* CX18_CARD_LEADTEK_DVR3100H
*/
/* Information/confirmation of proper config values provided by Terry Wu */
@@ -244,6 +272,7 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed)
switch (cx->card->type) {
case CX18_CARD_HVR_1600_ESMT:
case CX18_CARD_HVR_1600_SAMSUNG:
+ case CX18_CARD_HVR_1600_S5H1411:
v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL);
v |= 0x00400000; /* Serial Mode */
v |= 0x00002000; /* Data Length - Byte */
@@ -455,6 +484,15 @@ static int dvb_register(struct cx18_stream *stream)
ret = 0;
}
break;
+ case CX18_CARD_HVR_1600_S5H1411:
+ dvb->fe = dvb_attach(s5h1411_attach,
+ &hcw_s5h1411_config,
+ &cx->i2c_adap[0]);
+ if (dvb->fe != NULL)
+ dvb_attach(tda18271_attach, dvb->fe,
+ 0x60, &cx->i2c_adap[0],
+ &hauppauge_tda18271_config);
+ break;
case CX18_CARD_LEADTEK_DVR3100H:
dvb->fe = dvb_attach(zl10353_attach,
&leadtek_dvr3100h_demod,
diff --git a/drivers/media/video/cx23885/cx23885-i2c.c b/drivers/media/video/cx23885/cx23885-i2c.c
index ed3d8f55029..307ff543c25 100644
--- a/drivers/media/video/cx23885/cx23885-i2c.c
+++ b/drivers/media/video/cx23885/cx23885-i2c.c
@@ -122,10 +122,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
if (!i2c_wait_done(i2c_adap))
goto eio;
- if (!i2c_slave_did_ack(i2c_adap)) {
- retval = -ENXIO;
- goto err;
- }
if (i2c_debug) {
printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]);
if (!(ctrl & I2C_NOSTOP))
@@ -158,7 +154,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
eio:
retval = -EIO;
- err:
if (i2c_debug)
printk(KERN_ERR " ERR: %d\n", retval);
return retval;
@@ -209,10 +204,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
if (!i2c_wait_done(i2c_adap))
goto eio;
- if (cnt == 0 && !i2c_slave_did_ack(i2c_adap)) {
- retval = -ENXIO;
- goto err;
- }
msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff;
if (i2c_debug) {
dprintk(1, " %02x", msg->buf[cnt]);
@@ -224,7 +215,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
eio:
retval = -EIO;
- err:
if (i2c_debug)
printk(KERN_ERR " ERR: %d\n", retval);
return retval;
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 6fc09dd41b9..35796e03524 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -2015,7 +2015,8 @@ static int cx25840_probe(struct i2c_client *client,
kfree(state);
return err;
}
- v4l2_ctrl_cluster(2, &state->volume);
+ if (!is_cx2583x(state))
+ v4l2_ctrl_cluster(2, &state->volume);
v4l2_ctrl_handler_setup(&state->hdl);
if (client->dev.platform_data) {
diff --git a/drivers/media/video/ivtv/ivtv-irq.c b/drivers/media/video/ivtv/ivtv-irq.c
index 9b4faf00919..9c29e964d40 100644
--- a/drivers/media/video/ivtv/ivtv-irq.c
+++ b/drivers/media/video/ivtv/ivtv-irq.c
@@ -628,22 +628,66 @@ static void ivtv_irq_enc_pio_complete(struct ivtv *itv)
static void ivtv_irq_dma_err(struct ivtv *itv)
{
u32 data[CX2341X_MBOX_MAX_DATA];
+ u32 status;
del_timer(&itv->dma_timer);
+
ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data);
+ status = read_reg(IVTV_REG_DMASTATUS);
IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1],
- read_reg(IVTV_REG_DMASTATUS), itv->cur_dma_stream);
- write_reg(read_reg(IVTV_REG_DMASTATUS) & 3, IVTV_REG_DMASTATUS);
+ status, itv->cur_dma_stream);
+ /*
+ * We do *not* write back to the IVTV_REG_DMASTATUS register to
+ * clear the error status, if either the encoder write (0x02) or
+ * decoder read (0x01) bus master DMA operation do not indicate
+ * completed. We can race with the DMA engine, which may have
+ * transitioned to completed status *after* we read the register.
+ * Setting a IVTV_REG_DMASTATUS flag back to "busy" status, after the
+ * DMA engine has completed, will cause the DMA engine to stop working.
+ */
+ status &= 0x3;
+ if (status == 0x3)
+ write_reg(status, IVTV_REG_DMASTATUS);
+
if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) &&
itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) {
struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream];
- /* retry */
- if (s->type >= IVTV_DEC_STREAM_TYPE_MPG)
+ if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) {
+ /* retry */
+ /*
+ * FIXME - handle cases of DMA error similar to
+ * encoder below, except conditioned on status & 0x1
+ */
ivtv_dma_dec_start(s);
- else
- ivtv_dma_enc_start(s);
- return;
+ return;
+ } else {
+ if ((status & 0x2) == 0) {
+ /*
+ * CX2341x Bus Master DMA write is ongoing.
+ * Reset the timer and let it complete.
+ */
+ itv->dma_timer.expires =
+ jiffies + msecs_to_jiffies(600);
+ add_timer(&itv->dma_timer);
+ return;
+ }
+
+ if (itv->dma_retries < 3) {
+ /*
+ * CX2341x Bus Master DMA write has ended.
+ * Retry the write, starting with the first
+ * xfer segment. Just retrying the current
+ * segment is not sufficient.
+ */
+ s->sg_processed = 0;
+ itv->dma_retries++;
+ ivtv_dma_enc_start_xfer(s);
+ return;
+ }
+ /* Too many retries, give up on this one */
+ }
+
}
if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) {
ivtv_udma_start(itv);
diff --git a/drivers/media/video/mem2mem_testdev.c b/drivers/media/video/mem2mem_testdev.c
index c179041d91f..e7e717800ee 100644
--- a/drivers/media/video/mem2mem_testdev.c
+++ b/drivers/media/video/mem2mem_testdev.c
@@ -1011,7 +1011,6 @@ static int m2mtest_remove(struct platform_device *pdev)
v4l2_m2m_release(dev->m2m_dev);
del_timer_sync(&dev->timer);
video_unregister_device(dev->vfd);
- video_device_release(dev->vfd);
v4l2_device_unregister(&dev->v4l2_dev);
kfree(dev);
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b63f8cafa67..561909b65ce 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -57,7 +57,7 @@
#include <linux/usb.h>
#define S2255_MAJOR_VERSION 1
-#define S2255_MINOR_VERSION 20
+#define S2255_MINOR_VERSION 21
#define S2255_RELEASE 0
#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \
S2255_MINOR_VERSION, \
@@ -312,9 +312,9 @@ struct s2255_fh {
};
/* current cypress EEPROM firmware version */
-#define S2255_CUR_USB_FWVER ((3 << 8) | 6)
+#define S2255_CUR_USB_FWVER ((3 << 8) | 11)
/* current DSP FW version */
-#define S2255_CUR_DSP_FWVER 8
+#define S2255_CUR_DSP_FWVER 10102
/* Need DSP version 5+ for video status feature */
#define S2255_MIN_DSP_STATUS 5
#define S2255_MIN_DSP_COLORFILTER 8
@@ -492,9 +492,11 @@ static void planar422p_to_yuv_packed(const unsigned char *in,
static void s2255_reset_dsppower(struct s2255_dev *dev)
{
- s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b0b, NULL, 0, 1);
+ s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b01, NULL, 0, 1);
msleep(10);
s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1);
+ msleep(600);
+ s2255_vendor_req(dev, 0x10, 0x0000, 0x0000, NULL, 0, 1);
return;
}
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6625c057be0..150b5f3cd40 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1529,7 +1529,7 @@ void mmc_rescan(struct work_struct *work)
* still present
*/
if (host->bus_ops && host->bus_ops->detect && !host->bus_dead
- && mmc_card_is_removable(host))
+ && !(host->caps & MMC_CAP_NONREMOVABLE))
host->bus_ops->detect(host);
/*
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index fd877f633dd..2f7fc0c5146 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1516,21 +1516,17 @@ static int __devexit mmc_spi_remove(struct spi_device *spi)
return 0;
}
-#if defined(CONFIG_OF)
static struct of_device_id mmc_spi_of_match_table[] __devinitdata = {
{ .compatible = "mmc-spi-slot", },
{},
};
-#endif
static struct spi_driver mmc_spi_driver = {
.driver = {
.name = "mmc_spi",
.bus = &spi_bus_type,
.owner = THIS_MODULE,
-#if defined(CONFIG_OF)
.of_match_table = mmc_spi_of_match_table,
-#endif
},
.probe = mmc_spi_probe,
.remove = __devexit_p(mmc_spi_remove),
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index a8c3e1c9b02..4aaa88f8ab5 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1230,10 +1230,32 @@ static int inval_cache_and_wait_for_operation(
sleep_time = chip_op_time / 2;
for (;;) {
+ if (chip->state != chip_state) {
+ /* Someone's suspended the operation: sleep */
+ DECLARE_WAITQUEUE(wait, current);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&chip->wq, &wait);
+ mutex_unlock(&chip->mutex);
+ schedule();
+ remove_wait_queue(&chip->wq, &wait);
+ mutex_lock(&chip->mutex);
+ continue;
+ }
+
status = map_read(map, cmd_adr);
if (map_word_andequal(map, status, status_OK, status_OK))
break;
+ if (chip->erase_suspended && chip_state == FL_ERASING) {
+ /* Erase suspend occured while sleep: reset timeout */
+ timeo = reset_timeo;
+ chip->erase_suspended = 0;
+ }
+ if (chip->write_suspended && chip_state == FL_WRITING) {
+ /* Write suspend occured while sleep: reset timeout */
+ timeo = reset_timeo;
+ chip->write_suspended = 0;
+ }
if (!timeo) {
map_write(map, CMD(0x70), cmd_adr);
chip->state = FL_STATUS;
@@ -1257,27 +1279,6 @@ static int inval_cache_and_wait_for_operation(
timeo--;
}
mutex_lock(&chip->mutex);
-
- while (chip->state != chip_state) {
- /* Someone's suspended the operation: sleep */
- DECLARE_WAITQUEUE(wait, current);
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&chip->wq, &wait);
- mutex_unlock(&chip->mutex);
- schedule();
- remove_wait_queue(&chip->wq, &wait);
- mutex_lock(&chip->mutex);
- }
- if (chip->erase_suspended && chip_state == FL_ERASING) {
- /* Erase suspend occured while sleep: reset timeout */
- timeo = reset_timeo;
- chip->erase_suspended = 0;
- }
- if (chip->write_suspended && chip_state == FL_WRITING) {
- /* Write suspend occured while sleep: reset timeout */
- timeo = reset_timeo;
- chip->write_suspended = 0;
- }
}
/* Done and happy. */
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index d72a5fb2d04..4e1be51cc12 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1935,14 +1935,14 @@ static void jedec_reset(u32 base, struct map_info *map, struct cfi_private *cfi)
}
-static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
+static int cfi_jedec_setup(struct map_info *map, struct cfi_private *cfi, int index)
{
int i,num_erase_regions;
uint8_t uaddr;
- if (! (jedec_table[index].devtypes & p_cfi->device_type)) {
+ if (!(jedec_table[index].devtypes & cfi->device_type)) {
DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n",
- jedec_table[index].name, 4 * (1<<p_cfi->device_type));
+ jedec_table[index].name, 4 * (1<<cfi->device_type));
return 0;
}
@@ -1950,27 +1950,28 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
num_erase_regions = jedec_table[index].nr_regions;
- p_cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
- if (!p_cfi->cfiq) {
+ cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
+ if (!cfi->cfiq) {
//xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name);
return 0;
}
- memset(p_cfi->cfiq,0,sizeof(struct cfi_ident));
+ memset(cfi->cfiq, 0, sizeof(struct cfi_ident));
- p_cfi->cfiq->P_ID = jedec_table[index].cmd_set;
- p_cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
- p_cfi->cfiq->DevSize = jedec_table[index].dev_size;
- p_cfi->cfi_mode = CFI_MODE_JEDEC;
+ cfi->cfiq->P_ID = jedec_table[index].cmd_set;
+ cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
+ cfi->cfiq->DevSize = jedec_table[index].dev_size;
+ cfi->cfi_mode = CFI_MODE_JEDEC;
+ cfi->sector_erase_cmd = CMD(0x30);
for (i=0; i<num_erase_regions; i++){
- p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
+ cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
}
- p_cfi->cmdset_priv = NULL;
+ cfi->cmdset_priv = NULL;
/* This may be redundant for some cases, but it doesn't hurt */
- p_cfi->mfr = jedec_table[index].mfr_id;
- p_cfi->id = jedec_table[index].dev_id;
+ cfi->mfr = jedec_table[index].mfr_id;
+ cfi->id = jedec_table[index].dev_id;
uaddr = jedec_table[index].uaddr;
@@ -1978,8 +1979,8 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
our brains explode when we see the datasheets talking about address
lines numbered from A-1 to A18. The CFI table has unlock addresses
in device-words according to the mode the device is connected in */
- p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / p_cfi->device_type;
- p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / p_cfi->device_type;
+ cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / cfi->device_type;
+ cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / cfi->device_type;
return 1; /* ok */
}
@@ -2175,7 +2176,7 @@ static int jedec_probe_chip(struct map_info *map, __u32 base,
"MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n",
__func__, cfi->mfr, cfi->id,
cfi->addr_unlock1, cfi->addr_unlock2 );
- if (!cfi_jedec_setup(cfi, i))
+ if (!cfi_jedec_setup(map, cfi, i))
return 0;
goto ok_out;
}
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 77d64ce19e9..92de7e3a49a 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -151,6 +151,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
printk(KERN_ERR MOD_NAME
" %s(): Unable to register resource %pR - kernel bug?\n",
__func__, &window->rsrc);
+ return -EBUSY;
}
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index cb20c67995d..e0a2373bf0e 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -413,7 +413,6 @@ error3:
error2:
list_del(&new->list);
error1:
- kfree(new);
return ret;
}
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 15682ec8530..28af71c6183 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -968,6 +968,6 @@ static void __exit omap_nand_exit(void)
module_init(omap_nand_init);
module_exit(omap_nand_exit);
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards");
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index e78914938c5..ac08750748a 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -131,7 +131,7 @@ static struct platform_driver generic_onenand_driver = {
.remove = __devexit_p(generic_onenand_remove),
};
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
static int __init generic_onenand_init(void)
{
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index ac31f461cc1..c849cacf4b2 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -860,7 +860,7 @@ static void __exit omap2_onenand_exit(void)
module_init(omap2_onenand_init);
module_exit(omap2_onenand_exit);
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 39214e51245..7ca0eded256 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -425,11 +425,6 @@ static irqreturn_t ariadne_interrupt(int irq, void *data)
int csr0, boguscnt;
int handled = 0;
- if (dev == NULL) {
- printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n");
- return IRQ_NONE;
- }
-
lance->RAP = CSR0; /* PCnet-ISA Controller Status */
if (!(lance->RDP & INTR)) /* Check if any interrupt has been */
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 7897d114b29..8849699c66c 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1211,6 +1211,7 @@ struct bnx2x {
/* DCBX Negotation results */
struct dcbx_features dcbx_local_feat;
u32 dcbx_error;
+ u32 pending_max;
};
/**
@@ -1616,8 +1617,8 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
/* CMNG constants, as derived from system spec calculations */
/* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */
#define DEF_MIN_RATE 100
-/* resolution of the rate shaping timer - 100 usec */
-#define RS_PERIODIC_TIMEOUT_USEC 100
+/* resolution of the rate shaping timer - 400 usec */
+#define RS_PERIODIC_TIMEOUT_USEC 400
/* number of bytes in single QM arbitration cycle -
* coefficient for calculating the fairness timer */
#define QM_ARB_BYTES 160000
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 93798129061..a71b3294053 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -996,6 +996,23 @@ void bnx2x_free_skbs(struct bnx2x *bp)
bnx2x_free_rx_skbs(bp);
}
+void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
+{
+ /* load old values */
+ u32 mf_cfg = bp->mf_config[BP_VN(bp)];
+
+ if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
+ /* leave all but MAX value */
+ mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
+
+ /* set new MAX value */
+ mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
+ & FUNC_MF_CFG_MAX_BW_MASK;
+
+ bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
+ }
+}
+
static void bnx2x_free_msix_irqs(struct bnx2x *bp)
{
int i, offset = 1;
@@ -1464,6 +1481,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
bnx2x_set_eth_mac(bp, 1);
+ if (bp->pending_max) {
+ bnx2x_update_max_mf_config(bp, bp->pending_max);
+ bp->pending_max = 0;
+ }
+
if (bp->port.pmf)
bnx2x_initial_phy_init(bp, load_mode);
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index 326ba44b3de..85ea7f26b51 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -341,6 +341,15 @@ void bnx2x_dcbx_init(struct bnx2x *bp);
*/
int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state);
+/**
+ * Updates MAX part of MF configuration in HW
+ * (if required)
+ *
+ * @param bp
+ * @param value
+ */
+void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value);
+
/* dev_close main block */
int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode);
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index ef2919987a1..7e92f9d0dcf 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -238,7 +238,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
speed |= (cmd->speed_hi << 16);
if (IS_MF_SI(bp)) {
- u32 param = 0, part;
+ u32 part;
u32 line_speed = bp->link_vars.line_speed;
/* use 10G if no link detected */
@@ -251,24 +251,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
REQ_BC_VER_4_SET_MF_BW);
return -EINVAL;
}
+
part = (speed * 100) / line_speed;
+
if (line_speed < speed || !part) {
BNX2X_DEV_INFO("Speed setting should be in a range "
"from 1%% to 100%% "
"of actual line speed\n");
return -EINVAL;
}
- /* load old values */
- param = bp->mf_config[BP_VN(bp)];
- /* leave only MIN value */
- param &= FUNC_MF_CFG_MIN_BW_MASK;
-
- /* set new MAX value */
- param |= (part << FUNC_MF_CFG_MAX_BW_SHIFT)
- & FUNC_MF_CFG_MAX_BW_MASK;
+ if (bp->state != BNX2X_STATE_OPEN)
+ /* store value for following "load" */
+ bp->pending_max = part;
+ else
+ bnx2x_update_max_mf_config(bp, part);
- bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, param);
return 0;
}
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 032ae184b60..aa032339e32 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -2092,8 +2092,9 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
bnx2x_calc_vn_weight_sum(bp);
/* calculate and set min-max rate for each vn */
- for (vn = VN_0; vn < E1HVN_MAX; vn++)
- bnx2x_init_vn_minmax(bp, vn);
+ if (bp->port.pmf)
+ for (vn = VN_0; vn < E1HVN_MAX; vn++)
+ bnx2x_init_vn_minmax(bp, vn);
/* always enable rate shaping and fairness */
bp->cmng.flags.cmng_enables |=
@@ -2162,13 +2163,6 @@ static void bnx2x_link_attn(struct bnx2x *bp)
bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
}
- /* indicate link status only if link status actually changed */
- if (prev_link_status != bp->link_vars.link_status)
- bnx2x_link_report(bp);
-
- if (IS_MF(bp))
- bnx2x_link_sync_notify(bp);
-
if (bp->link_vars.link_up && bp->link_vars.line_speed) {
int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
@@ -2180,6 +2174,13 @@ static void bnx2x_link_attn(struct bnx2x *bp)
DP(NETIF_MSG_IFUP,
"single function mode without fairness\n");
}
+
+ if (IS_MF(bp))
+ bnx2x_link_sync_notify(bp);
+
+ /* indicate link status only if link status actually changed */
+ if (prev_link_status != bp->link_vars.link_status)
+ bnx2x_link_report(bp);
}
void bnx2x__link_status_update(struct bnx2x *bp)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 1024ae15822..a5d5d0b5b15 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -281,23 +281,23 @@ static inline int __check_agg_selection_timer(struct port *port)
}
/**
- * __get_rx_machine_lock - lock the port's RX machine
+ * __get_state_machine_lock - lock the port's state machines
* @port: the port we're looking at
*
*/
-static inline void __get_rx_machine_lock(struct port *port)
+static inline void __get_state_machine_lock(struct port *port)
{
- spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
+ spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
}
/**
- * __release_rx_machine_lock - unlock the port's RX machine
+ * __release_state_machine_lock - unlock the port's state machines
* @port: the port we're looking at
*
*/
-static inline void __release_rx_machine_lock(struct port *port)
+static inline void __release_state_machine_lock(struct port *port)
{
- spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
+ spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
}
/**
@@ -388,14 +388,14 @@ static u8 __get_duplex(struct port *port)
}
/**
- * __initialize_port_locks - initialize a port's RX machine spinlock
+ * __initialize_port_locks - initialize a port's STATE machine spinlock
* @port: the port we're looking at
*
*/
static inline void __initialize_port_locks(struct port *port)
{
// make sure it isn't called twice
- spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
+ spin_lock_init(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
}
//conversions
@@ -1025,9 +1025,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
{
rx_states_t last_state;
- // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback)
- __get_rx_machine_lock(port);
-
// keep current State Machine state to compare later if it was changed
last_state = port->sm_rx_state;
@@ -1133,7 +1130,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
pr_err("%s: An illegal loopback occurred on adapter (%s).\n"
"Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n",
port->slave->dev->master->name, port->slave->dev->name);
- __release_rx_machine_lock(port);
return;
}
__update_selected(lacpdu, port);
@@ -1153,7 +1149,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
break;
}
}
- __release_rx_machine_lock(port);
}
/**
@@ -2155,6 +2150,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
goto re_arm;
}
+ /* Lock around state machines to protect data accessed
+ * by all (e.g., port->sm_vars). ad_rx_machine may run
+ * concurrently due to incoming LACPDU.
+ */
+ __get_state_machine_lock(port);
+
ad_rx_machine(NULL, port);
ad_periodic_machine(port);
ad_port_selection_logic(port);
@@ -2164,6 +2165,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
// turn off the BEGIN bit, since we already handled it
if (port->sm_vars & AD_PORT_BEGIN)
port->sm_vars &= ~AD_PORT_BEGIN;
+
+ __release_state_machine_lock(port);
}
re_arm:
@@ -2200,7 +2203,10 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
case AD_TYPE_LACPDU:
pr_debug("Received LACPDU on port %d\n",
port->actor_port_number);
+ /* Protect against concurrent state machines */
+ __get_state_machine_lock(port);
ad_rx_machine(lacpdu, port);
+ __release_state_machine_lock(port);
break;
case AD_TYPE_MARKER:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 2c46a154f2c..b28baff7086 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -264,7 +264,8 @@ struct ad_bond_info {
struct ad_slave_info {
struct aggregator aggregator; // 802.3ad aggregator structure
struct port port; // 802.3ad port structure
- spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt
+ spinlock_t state_machine_lock; /* mutex state machines vs.
+ incoming LACPDU */
u16 id;
};
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index b79d7e1555d..db0290f05bd 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -1163,15 +1163,11 @@ static int ethoc_resume(struct platform_device *pdev)
# define ethoc_resume NULL
#endif
-#ifdef CONFIG_OF
static struct of_device_id ethoc_match[] = {
- {
- .compatible = "opencores,ethoc",
- },
+ { .compatible = "opencores,ethoc", },
{},
};
MODULE_DEVICE_TABLE(of, ethoc_match);
-#endif
static struct platform_driver ethoc_driver = {
.probe = ethoc_probe,
@@ -1181,9 +1177,7 @@ static struct platform_driver ethoc_driver = {
.driver = {
.name = "ethoc",
.owner = THIS_MODULE,
-#ifdef CONFIG_OF
.of_match_table = ethoc_match,
-#endif
},
};
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5933621ac3f..fc27a9926d9 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -528,8 +528,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
vnet_hdr_len = q->vnet_hdr_sz;
err = -EINVAL;
- if ((len -= vnet_hdr_len) < 0)
+ if (len < vnet_hdr_len)
goto err;
+ len -= vnet_hdr_len;
err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0,
sizeof(vnet_hdr));
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 27e6f6d43ca..e3ebd90ae65 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -49,8 +49,8 @@
#include <asm/processor.h>
#define DRV_NAME "r6040"
-#define DRV_VERSION "0.26"
-#define DRV_RELDATE "30May2010"
+#define DRV_VERSION "0.27"
+#define DRV_RELDATE "23Feb2011"
/* PHY CHIP Address */
#define PHY1_ADDR 1 /* For MAC1 */
@@ -69,6 +69,8 @@
/* MAC registers */
#define MCR0 0x00 /* Control register 0 */
+#define MCR0_PROMISC 0x0020 /* Promiscuous mode */
+#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */
#define MCR1 0x04 /* Control register 1 */
#define MAC_RST 0x0001 /* Reset the MAC */
#define MBCR 0x08 /* Bus control */
@@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev)
{
struct r6040_private *lp = netdev_priv(dev);
void __iomem *ioaddr = lp->base;
- u16 *adrp;
- u16 reg;
unsigned long flags;
struct netdev_hw_addr *ha;
int i;
+ u16 *adrp;
+ u16 hash_table[4] = { 0 };
+
+ spin_lock_irqsave(&lp->lock, flags);
- /* MAC Address */
+ /* Keep our MAC Address */
adrp = (u16 *)dev->dev_addr;
iowrite16(adrp[0], ioaddr + MID_0L);
iowrite16(adrp[1], ioaddr + MID_0M);
iowrite16(adrp[2], ioaddr + MID_0H);
- /* Promiscous Mode */
- spin_lock_irqsave(&lp->lock, flags);
-
/* Clear AMCP & PROM bits */
- reg = ioread16(ioaddr) & ~0x0120;
- if (dev->flags & IFF_PROMISC) {
- reg |= 0x0020;
- lp->mcr0 |= 0x0020;
- }
- /* Too many multicast addresses
- * accept all traffic */
- else if ((netdev_mc_count(dev) > MCAST_MAX) ||
- (dev->flags & IFF_ALLMULTI))
- reg |= 0x0020;
+ lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN);
- iowrite16(reg, ioaddr);
- spin_unlock_irqrestore(&lp->lock, flags);
+ /* Promiscuous mode */
+ if (dev->flags & IFF_PROMISC)
+ lp->mcr0 |= MCR0_PROMISC;
- /* Build the hash table */
- if (netdev_mc_count(dev) > MCAST_MAX) {
- u16 hash_table[4];
- u32 crc;
+ /* Enable multicast hash table function to
+ * receive all multicast packets. */
+ else if (dev->flags & IFF_ALLMULTI) {
+ lp->mcr0 |= MCR0_HASH_EN;
- for (i = 0; i < 4; i++)
- hash_table[i] = 0;
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
+ for (i = 0; i < 4; i++)
+ hash_table[i] = 0xffff;
+ }
+ /* Use internal multicast address registers if the number of
+ * multicast addresses is not greater than MCAST_MAX. */
+ else if (netdev_mc_count(dev) <= MCAST_MAX) {
+ i = 0;
netdev_for_each_mc_addr(ha, dev) {
- char *addrs = ha->addr;
+ u16 *adrp = (u16 *) ha->addr;
+ iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
+ iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
+ iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
+ i++;
+ }
+ while (i < MCAST_MAX) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ i++;
+ }
+ }
+ /* Otherwise, Enable multicast hash table function. */
+ else {
+ u32 crc;
- if (!(*addrs & 1))
- continue;
+ lp->mcr0 |= MCR0_HASH_EN;
+
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
- crc = ether_crc_le(6, addrs);
+ /* Build multicast hash table */
+ netdev_for_each_mc_addr(ha, dev) {
+ u8 *addrs = ha->addr;
+
+ crc = ether_crc(ETH_ALEN, addrs);
crc >>= 26;
- hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
+ hash_table[crc >> 4] |= 1 << (crc & 0xf);
}
- /* Fill the MAC hash tables with their values */
+ }
+
+ iowrite16(lp->mcr0, ioaddr + MCR0);
+
+ /* Fill the MAC hash tables with their values */
+ if (lp->mcr0 && MCR0_HASH_EN) {
iowrite16(hash_table[0], ioaddr + MAR0);
iowrite16(hash_table[1], ioaddr + MAR1);
iowrite16(hash_table[2], ioaddr + MAR2);
iowrite16(hash_table[3], ioaddr + MAR3);
}
- /* Multicast Address 1~4 case */
- i = 0;
- netdev_for_each_mc_addr(ha, dev) {
- if (i >= MCAST_MAX)
- break;
- adrp = (u16 *) ha->addr;
- iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
- iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
- iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
- i++;
- }
- while (i < MCAST_MAX) {
- iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
- iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
- iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
- i++;
- }
+
+ spin_unlock_irqrestore(&lp->lock, flags);
}
static void netdev_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 64bfdae5956..d70bde95460 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1178,6 +1178,11 @@ static int smsc911x_open(struct net_device *dev)
smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740);
+ /* Increase the legal frame size of VLAN tagged frames to 1522 bytes */
+ spin_lock_irq(&pdata->mac_lock);
+ smsc911x_mac_write(pdata, VLAN1, ETH_P_8021Q);
+ spin_unlock_irq(&pdata->mac_lock);
+
/* Make sure EEPROM has finished loading before setting GPIO_CFG */
timeout = 50;
while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) &&
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3c6e100a3ad..d06a6374ed6 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -69,4 +69,10 @@ config OF_MDIO
help
OpenFirmware MDIO bus (Ethernet PHY) accessors
+config OF_PCI
+ def_tristate PCI
+ depends on PCI && (PPC || MICROBLAZE || X86)
+ help
+ OpenFirmware PCI bus accessors
+
endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 3ab21a0a490..f7861ed2f28 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_OF_I2C) += of_i2c.o
obj-$(CONFIG_OF_NET) += of_net.o
obj-$(CONFIG_OF_SPI) += of_spi.o
obj-$(CONFIG_OF_MDIO) += of_mdio.o
+obj-$(CONFIG_OF_PCI) += of_pci.o
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
new file mode 100644
index 00000000000..ac1ec54e4fd
--- /dev/null
+++ b/drivers/of/of_pci.c
@@ -0,0 +1,92 @@
+#include <linux/kernel.h>
+#include <linux/of_pci.h>
+#include <linux/of_irq.h>
+#include <asm/prom.h>
+
+/**
+ * of_irq_map_pci - Resolve the interrupt for a PCI device
+ * @pdev: the device whose interrupt is to be resolved
+ * @out_irq: structure of_irq filled by this function
+ *
+ * This function resolves the PCI interrupt for a given PCI device. If a
+ * device-node exists for a given pci_dev, it will use normal OF tree
+ * walking. If not, it will implement standard swizzling and walk up the
+ * PCI tree until an device-node is found, at which point it will finish
+ * resolving using the OF tree walking.
+ */
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
+{
+ struct device_node *dn, *ppnode;
+ struct pci_dev *ppdev;
+ u32 lspec;
+ __be32 lspec_be;
+ __be32 laddr[3];
+ u8 pin;
+ int rc;
+
+ /* Check if we have a device node, if yes, fallback to standard
+ * device tree parsing
+ */
+ dn = pci_device_to_OF_node(pdev);
+ if (dn) {
+ rc = of_irq_map_one(dn, 0, out_irq);
+ if (!rc)
+ return rc;
+ }
+
+ /* Ok, we don't, time to have fun. Let's start by building up an
+ * interrupt spec. we assume #interrupt-cells is 1, which is standard
+ * for PCI. If you do different, then don't use that routine.
+ */
+ rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+ if (rc != 0)
+ return rc;
+ /* No pin, exit */
+ if (pin == 0)
+ return -ENODEV;
+
+ /* Now we walk up the PCI tree */
+ lspec = pin;
+ for (;;) {
+ /* Get the pci_dev of our parent */
+ ppdev = pdev->bus->self;
+
+ /* Ouch, it's a host bridge... */
+ if (ppdev == NULL) {
+ ppnode = pci_bus_to_OF_node(pdev->bus);
+
+ /* No node for host bridge ? give up */
+ if (ppnode == NULL)
+ return -EINVAL;
+ } else {
+ /* We found a P2P bridge, check if it has a node */
+ ppnode = pci_device_to_OF_node(ppdev);
+ }
+
+ /* Ok, we have found a parent with a device-node, hand over to
+ * the OF parsing code.
+ * We build a unit address from the linux device to be used for
+ * resolution. Note that we use the linux bus number which may
+ * not match your firmware bus numbering.
+ * Fortunately, in most cases, interrupt-map-mask doesn't
+ * include the bus number as part of the matching.
+ * You should still be careful about that though if you intend
+ * to rely on this function (you ship a firmware that doesn't
+ * create device nodes for all PCI devices).
+ */
+ if (ppnode)
+ break;
+
+ /* We can only get here if we hit a P2P bridge with no node,
+ * let's do standard swizzling and try again
+ */
+ lspec = pci_swizzle_interrupt_pin(pdev, lspec);
+ pdev = ppdev;
+ }
+
+ lspec_be = cpu_to_be32(lspec);
+ laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
+ laddr[1] = laddr[2] = cpu_to_be32(0);
+ return of_irq_map_raw(ppnode, &lspec_be, 1, laddr, out_irq);
+}
+EXPORT_SYMBOL_GPL(of_irq_map_pci);
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 3a5a6fcc0ea..492b7d807fe 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
#ifdef CONFIG_PCI_MSI
static int pci_frontend_enable_msix(struct pci_dev *dev,
- int **vector, int nvec)
+ int vector[], int nvec)
{
int err;
int i;
@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
if (likely(!err)) {
if (likely(!op.value)) {
/* we get the result */
- for (i = 0; i < nvec; i++)
- *(*vector+i) = op.msix_entries[i].vector;
- return 0;
+ for (i = 0; i < nvec; i++) {
+ if (op.msix_entries[i].vector <= 0) {
+ dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
+ i, op.msix_entries[i].vector);
+ err = -EINVAL;
+ vector[i] = -1;
+ continue;
+ }
+ vector[i] = op.msix_entries[i].vector;
+ }
} else {
printk(KERN_DEBUG "enable msix get value %x\n",
op.value);
- return op.value;
}
} else {
dev_err(&dev->dev, "enable msix get err %x\n", err);
- return err;
}
+ return err;
}
static void pci_frontend_disable_msix(struct pci_dev *dev)
@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
}
-static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
+static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
{
int err;
struct xen_pci_op op = {
@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
err = do_pci_op(pdev, &op);
if (likely(!err)) {
- *(*vector) = op.value;
+ vector[0] = op.value;
+ if (op.value <= 0) {
+ dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
+ op.value);
+ err = -EINVAL;
+ vector[0] = -1;
+ }
} else {
dev_err(&dev->dev, "pci frontend enable msi failed for dev "
"%x:%x\n", op.bus, op.devfn);
@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
pcifront_free_roots(pdev);
- /*For PCIE_AER error handling job*/
- flush_scheduled_work();
+ cancel_work_sync(&pdev->op_work);
if (pdev->irq >= 0)
unbind_from_irqhandler(pdev->irq, pdev);
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index c404b61386b..09b4437b3e6 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -117,6 +117,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
struct module *owner)
{
struct rtc_device *rtc;
+ struct rtc_wkalrm alrm;
int id, err;
if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
@@ -166,6 +167,12 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
rtc->pie_timer.function = rtc_pie_update_irq;
rtc->pie_enabled = 0;
+ /* Check to see if there is an ALARM already set in hw */
+ err = __rtc_read_alarm(rtc, &alrm);
+
+ if (!err && !rtc_valid_tm(&alrm.time))
+ rtc_set_alarm(rtc, &alrm);
+
strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
dev_set_name(&rtc->dev, "rtc%d", id);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index cb2f0728fd7..8ec6b069a7f 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -116,6 +116,186 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
}
EXPORT_SYMBOL_GPL(rtc_set_mmss);
+static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+ int err;
+
+ err = mutex_lock_interruptible(&rtc->ops_lock);
+ if (err)
+ return err;
+
+ if (rtc->ops == NULL)
+ err = -ENODEV;
+ else if (!rtc->ops->read_alarm)
+ err = -EINVAL;
+ else {
+ memset(alarm, 0, sizeof(struct rtc_wkalrm));
+ err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
+ }
+
+ mutex_unlock(&rtc->ops_lock);
+ return err;
+}
+
+int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+ int err;
+ struct rtc_time before, now;
+ int first_time = 1;
+ unsigned long t_now, t_alm;
+ enum { none, day, month, year } missing = none;
+ unsigned days;
+
+ /* The lower level RTC driver may return -1 in some fields,
+ * creating invalid alarm->time values, for reasons like:
+ *
+ * - The hardware may not be capable of filling them in;
+ * many alarms match only on time-of-day fields, not
+ * day/month/year calendar data.
+ *
+ * - Some hardware uses illegal values as "wildcard" match
+ * values, which non-Linux firmware (like a BIOS) may try
+ * to set up as e.g. "alarm 15 minutes after each hour".
+ * Linux uses only oneshot alarms.
+ *
+ * When we see that here, we deal with it by using values from
+ * a current RTC timestamp for any missing (-1) values. The
+ * RTC driver prevents "periodic alarm" modes.
+ *
+ * But this can be racey, because some fields of the RTC timestamp
+ * may have wrapped in the interval since we read the RTC alarm,
+ * which would lead to us inserting inconsistent values in place
+ * of the -1 fields.
+ *
+ * Reading the alarm and timestamp in the reverse sequence
+ * would have the same race condition, and not solve the issue.
+ *
+ * So, we must first read the RTC timestamp,
+ * then read the RTC alarm value,
+ * and then read a second RTC timestamp.
+ *
+ * If any fields of the second timestamp have changed
+ * when compared with the first timestamp, then we know
+ * our timestamp may be inconsistent with that used by
+ * the low-level rtc_read_alarm_internal() function.
+ *
+ * So, when the two timestamps disagree, we just loop and do
+ * the process again to get a fully consistent set of values.
+ *
+ * This could all instead be done in the lower level driver,
+ * but since more than one lower level RTC implementation needs it,
+ * then it's probably best best to do it here instead of there..
+ */
+
+ /* Get the "before" timestamp */
+ err = rtc_read_time(rtc, &before);
+ if (err < 0)
+ return err;
+ do {
+ if (!first_time)
+ memcpy(&before, &now, sizeof(struct rtc_time));
+ first_time = 0;
+
+ /* get the RTC alarm values, which may be incomplete */
+ err = rtc_read_alarm_internal(rtc, alarm);
+ if (err)
+ return err;
+
+ /* full-function RTCs won't have such missing fields */
+ if (rtc_valid_tm(&alarm->time) == 0)
+ return 0;
+
+ /* get the "after" timestamp, to detect wrapped fields */
+ err = rtc_read_time(rtc, &now);
+ if (err < 0)
+ return err;
+
+ /* note that tm_sec is a "don't care" value here: */
+ } while ( before.tm_min != now.tm_min
+ || before.tm_hour != now.tm_hour
+ || before.tm_mon != now.tm_mon
+ || before.tm_year != now.tm_year);
+
+ /* Fill in the missing alarm fields using the timestamp; we
+ * know there's at least one since alarm->time is invalid.
+ */
+ if (alarm->time.tm_sec == -1)
+ alarm->time.tm_sec = now.tm_sec;
+ if (alarm->time.tm_min == -1)
+ alarm->time.tm_min = now.tm_min;
+ if (alarm->time.tm_hour == -1)
+ alarm->time.tm_hour = now.tm_hour;
+
+ /* For simplicity, only support date rollover for now */
+ if (alarm->time.tm_mday == -1) {
+ alarm->time.tm_mday = now.tm_mday;
+ missing = day;
+ }
+ if (alarm->time.tm_mon == -1) {
+ alarm->time.tm_mon = now.tm_mon;
+ if (missing == none)
+ missing = month;
+ }
+ if (alarm->time.tm_year == -1) {
+ alarm->time.tm_year = now.tm_year;
+ if (missing == none)
+ missing = year;
+ }
+
+ /* with luck, no rollover is needed */
+ rtc_tm_to_time(&now, &t_now);
+ rtc_tm_to_time(&alarm->time, &t_alm);
+ if (t_now < t_alm)
+ goto done;
+
+ switch (missing) {
+
+ /* 24 hour rollover ... if it's now 10am Monday, an alarm that
+ * that will trigger at 5am will do so at 5am Tuesday, which
+ * could also be in the next month or year. This is a common
+ * case, especially for PCs.
+ */
+ case day:
+ dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day");
+ t_alm += 24 * 60 * 60;
+ rtc_time_to_tm(t_alm, &alarm->time);
+ break;
+
+ /* Month rollover ... if it's the 31th, an alarm on the 3rd will
+ * be next month. An alarm matching on the 30th, 29th, or 28th
+ * may end up in the month after that! Many newer PCs support
+ * this type of alarm.
+ */
+ case month:
+ dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month");
+ do {
+ if (alarm->time.tm_mon < 11)
+ alarm->time.tm_mon++;
+ else {
+ alarm->time.tm_mon = 0;
+ alarm->time.tm_year++;
+ }
+ days = rtc_month_days(alarm->time.tm_mon,
+ alarm->time.tm_year);
+ } while (days < alarm->time.tm_mday);
+ break;
+
+ /* Year rollover ... easy except for leap years! */
+ case year:
+ dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
+ do {
+ alarm->time.tm_year++;
+ } while (rtc_valid_tm(&alarm->time) != 0);
+ break;
+
+ default:
+ dev_warn(&rtc->dev, "alarm rollover not handled\n");
+ }
+
+done:
+ return 0;
+}
+
int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
{
int err;
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 26d1cf5d19a..518a76ec71c 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -183,33 +183,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
return 0;
}
-/*
- * Handle commands from user-space
- */
-static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
- unsigned long arg)
-{
- int ret = 0;
-
- pr_debug("%s(): cmd=%08x, arg=%08lx.\n", __func__, cmd, arg);
-
- /* important: scrub old status before enabling IRQs */
- switch (cmd) {
- case RTC_UIE_OFF: /* update off */
- at91_sys_write(AT91_RTC_IDR, AT91_RTC_SECEV);
- break;
- case RTC_UIE_ON: /* update on */
- at91_sys_write(AT91_RTC_SCCR, AT91_RTC_SECEV);
- at91_sys_write(AT91_RTC_IER, AT91_RTC_SECEV);
- break;
- default:
- ret = -ENOIOCTLCMD;
- break;
- }
-
- return ret;
-}
-
static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
pr_debug("%s(): cmd=%08x\n", __func__, enabled);
@@ -269,7 +242,6 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
}
static const struct rtc_class_ops at91_rtc_ops = {
- .ioctl = at91_rtc_ioctl,
.read_time = at91_rtc_readtime,
.set_time = at91_rtc_settime,
.read_alarm = at91_rtc_readalarm,
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 5469c52cba3..a3ad957507d 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -216,33 +216,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
return 0;
}
-/*
- * Handle commands from user-space
- */
-static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
- unsigned long arg)
-{
- struct sam9_rtc *rtc = dev_get_drvdata(dev);
- int ret = 0;
- u32 mr = rtt_readl(rtc, MR);
-
- dev_dbg(dev, "ioctl: cmd=%08x, arg=%08lx, mr %08x\n", cmd, arg, mr);
-
- switch (cmd) {
- case RTC_UIE_OFF: /* update off */
- rtt_writel(rtc, MR, mr & ~AT91_RTT_RTTINCIEN);
- break;
- case RTC_UIE_ON: /* update on */
- rtt_writel(rtc, MR, mr | AT91_RTT_RTTINCIEN);
- break;
- default:
- ret = -ENOIOCTLCMD;
- break;
- }
-
- return ret;
-}
-
static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct sam9_rtc *rtc = dev_get_drvdata(dev);
@@ -303,7 +276,6 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc)
}
static const struct rtc_class_ops at91_rtc_ops = {
- .ioctl = at91_rtc_ioctl,
.read_time = at91_rtc_readtime,
.set_time = at91_rtc_settime,
.read_alarm = at91_rtc_readalarm,
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 17971d93354..ca9cff85ab8 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -240,32 +240,6 @@ static void bfin_rtc_int_set_alarm(struct bfin_rtc *rtc)
*/
bfin_rtc_int_set(rtc->rtc_alarm.tm_yday == -1 ? RTC_ISTAT_ALARM : RTC_ISTAT_ALARM_DAY);
}
-static int bfin_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- struct bfin_rtc *rtc = dev_get_drvdata(dev);
- int ret = 0;
-
- dev_dbg_stamp(dev);
-
- bfin_rtc_sync_pending(dev);
-
- switch (cmd) {
- case RTC_UIE_ON:
- dev_dbg_stamp(dev);
- bfin_rtc_int_set(RTC_ISTAT_SEC);
- break;
- case RTC_UIE_OFF:
- dev_dbg_stamp(dev);
- bfin_rtc_int_clear(~RTC_ISTAT_SEC);
- break;
-
- default:
- dev_dbg_stamp(dev);
- ret = -ENOIOCTLCMD;
- }
-
- return ret;
-}
static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
@@ -358,7 +332,6 @@ static int bfin_rtc_proc(struct device *dev, struct seq_file *seq)
}
static struct rtc_class_ops bfin_rtc_ops = {
- .ioctl = bfin_rtc_ioctl,
.read_time = bfin_rtc_read_time,
.set_time = bfin_rtc_set_time,
.read_alarm = bfin_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index c7ff8df347e..911e75cdc12 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -37,6 +37,8 @@
#include <linux/mod_devicetable.h>
#include <linux/log2.h>
#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
#include <asm-generic/rtc.h>
@@ -375,50 +377,6 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
return 0;
}
-static int cmos_irq_set_freq(struct device *dev, int freq)
-{
- struct cmos_rtc *cmos = dev_get_drvdata(dev);
- int f;
- unsigned long flags;
-
- if (!is_valid_irq(cmos->irq))
- return -ENXIO;
-
- if (!is_power_of_2(freq))
- return -EINVAL;
- /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
- f = ffs(freq);
- if (f-- > 16)
- return -EINVAL;
- f = 16 - f;
-
- spin_lock_irqsave(&rtc_lock, flags);
- hpet_set_periodic_freq(freq);
- CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return 0;
-}
-
-static int cmos_irq_set_state(struct device *dev, int enabled)
-{
- struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned long flags;
-
- if (!is_valid_irq(cmos->irq))
- return -ENXIO;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if (enabled)
- cmos_irq_enable(cmos, RTC_PIE);
- else
- cmos_irq_disable(cmos, RTC_PIE);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return 0;
-}
-
static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
@@ -438,25 +396,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int cmos_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned long flags;
-
- if (!is_valid_irq(cmos->irq))
- return -EINVAL;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if (enabled)
- cmos_irq_enable(cmos, RTC_UIE);
- else
- cmos_irq_disable(cmos, RTC_UIE);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return 0;
-}
-
#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
static int cmos_procfs(struct device *dev, struct seq_file *seq)
@@ -501,10 +440,7 @@ static const struct rtc_class_ops cmos_rtc_ops = {
.read_alarm = cmos_read_alarm,
.set_alarm = cmos_set_alarm,
.proc = cmos_procfs,
- .irq_set_freq = cmos_irq_set_freq,
- .irq_set_state = cmos_irq_set_state,
.alarm_irq_enable = cmos_alarm_irq_enable,
- .update_irq_enable = cmos_update_irq_enable,
};
/*----------------------------------------------------------------*/
@@ -1123,6 +1059,47 @@ static struct pnp_driver cmos_pnp_driver = {
#endif /* CONFIG_PNP */
+#ifdef CONFIG_OF
+static const struct of_device_id of_cmos_match[] = {
+ {
+ .compatible = "motorola,mc146818",
+ },
+ { },
+};
+MODULE_DEVICE_TABLE(of, of_cmos_match);
+
+static __init void cmos_of_init(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct rtc_time time;
+ int ret;
+ const __be32 *val;
+
+ if (!node)
+ return;
+
+ val = of_get_property(node, "ctrl-reg", NULL);
+ if (val)
+ CMOS_WRITE(be32_to_cpup(val), RTC_CONTROL);
+
+ val = of_get_property(node, "freq-reg", NULL);
+ if (val)
+ CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
+
+ get_rtc_time(&time);
+ ret = rtc_valid_tm(&time);
+ if (ret) {
+ struct rtc_time def_time = {
+ .tm_year = 1,
+ .tm_mday = 1,
+ };
+ set_rtc_time(&def_time);
+ }
+}
+#else
+static inline void cmos_of_init(struct platform_device *pdev) {}
+#define of_cmos_match NULL
+#endif
/*----------------------------------------------------------------*/
/* Platform setup should have set up an RTC device, when PNP is
@@ -1131,6 +1108,7 @@ static struct pnp_driver cmos_pnp_driver = {
static int __init cmos_platform_probe(struct platform_device *pdev)
{
+ cmos_of_init(pdev);
cmos_wake_setup(&pdev->dev);
return cmos_do_probe(&pdev->dev,
platform_get_resource(pdev, IORESOURCE_IO, 0),
@@ -1162,6 +1140,7 @@ static struct platform_driver cmos_platform_driver = {
#ifdef CONFIG_PM
.pm = &cmos_pm_ops,
#endif
+ .of_match_table = of_cmos_match,
}
};
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 34647fc1ee9..8d46838dff8 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c
@@ -231,10 +231,6 @@ davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
case RTC_WIE_OFF:
rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN;
break;
- case RTC_UIE_OFF:
- case RTC_UIE_ON:
- ret = -ENOTTY;
- break;
default:
ret = -ENOIOCTLCMD;
}
@@ -473,55 +469,6 @@ static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
return 0;
}
-static int davinci_rtc_irq_set_state(struct device *dev, int enabled)
-{
- struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
- unsigned long flags;
- u8 rtc_ctrl;
-
- spin_lock_irqsave(&davinci_rtc_lock, flags);
-
- rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL);
-
- if (enabled) {
- while (rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL)
- & PRTCSS_RTC_CTRL_WDTBUS)
- cpu_relax();
-
- rtc_ctrl |= PRTCSS_RTC_CTRL_TE;
- rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
-
- rtcss_write(davinci_rtc, 0x0, PRTCSS_RTC_CLKC_CNT);
-
- rtc_ctrl |= PRTCSS_RTC_CTRL_TIEN |
- PRTCSS_RTC_CTRL_TMMD |
- PRTCSS_RTC_CTRL_TMRFLG;
- } else
- rtc_ctrl &= ~PRTCSS_RTC_CTRL_TIEN;
-
- rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
-
- spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
- return 0;
-}
-
-static int davinci_rtc_irq_set_freq(struct device *dev, int freq)
-{
- struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
- unsigned long flags;
- u16 tmr_counter = (0x8000 >> (ffs(freq) - 1));
-
- spin_lock_irqsave(&davinci_rtc_lock, flags);
-
- rtcss_write(davinci_rtc, tmr_counter & 0xFF, PRTCSS_RTC_TMR0);
- rtcss_write(davinci_rtc, (tmr_counter & 0xFF00) >> 8, PRTCSS_RTC_TMR1);
-
- spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
- return 0;
-}
-
static struct rtc_class_ops davinci_rtc_ops = {
.ioctl = davinci_rtc_ioctl,
.read_time = davinci_rtc_read_time,
@@ -529,8 +476,6 @@ static struct rtc_class_ops davinci_rtc_ops = {
.alarm_irq_enable = davinci_rtc_alarm_irq_enable,
.read_alarm = davinci_rtc_read_alarm,
.set_alarm = davinci_rtc_set_alarm,
- .irq_set_state = davinci_rtc_irq_set_state,
- .irq_set_freq = davinci_rtc_irq_set_freq,
};
static int __init davinci_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 37268e97de4..3fffd708711 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -397,29 +397,12 @@ static int ds1511_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int ds1511_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct platform_device *pdev = to_platform_device(dev);
- struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
- if (pdata->irq <= 0)
- return -EINVAL;
- if (enabled)
- pdata->irqen |= RTC_UF;
- else
- pdata->irqen &= ~RTC_UF;
- ds1511_rtc_update_alarm(pdata);
- return 0;
-}
-
static const struct rtc_class_ops ds1511_rtc_ops = {
.read_time = ds1511_rtc_read_time,
.set_time = ds1511_rtc_set_time,
.read_alarm = ds1511_rtc_read_alarm,
.set_alarm = ds1511_rtc_set_alarm,
.alarm_irq_enable = ds1511_rtc_alarm_irq_enable,
- .update_irq_enable = ds1511_rtc_update_irq_enable,
};
static ssize_t
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index ff432e2ca27..fee41b97c9e 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -227,29 +227,12 @@ static int ds1553_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int ds1553_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct platform_device *pdev = to_platform_device(dev);
- struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
- if (pdata->irq <= 0)
- return -EINVAL;
- if (enabled)
- pdata->irqen |= RTC_UF;
- else
- pdata->irqen &= ~RTC_UF;
- ds1553_rtc_update_alarm(pdata);
- return 0;
-}
-
static const struct rtc_class_ops ds1553_rtc_ops = {
.read_time = ds1553_rtc_read_time,
.set_time = ds1553_rtc_set_time,
.read_alarm = ds1553_rtc_read_alarm,
.set_alarm = ds1553_rtc_set_alarm,
.alarm_irq_enable = ds1553_rtc_alarm_irq_enable,
- .update_irq_enable = ds1553_rtc_update_irq_enable,
};
static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 950735415a7..27b7bf672ac 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -339,23 +339,6 @@ static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int ds3232_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct ds3232 *ds3232 = i2c_get_clientdata(client);
-
- if (client->irq <= 0)
- return -EINVAL;
-
- if (enabled)
- ds3232->rtc->irq_data |= RTC_UF;
- else
- ds3232->rtc->irq_data &= ~RTC_UF;
-
- ds3232_update_alarm(client);
- return 0;
-}
-
static irqreturn_t ds3232_irq(int irq, void *dev_id)
{
struct i2c_client *client = dev_id;
@@ -406,7 +389,6 @@ static const struct rtc_class_ops ds3232_rtc_ops = {
.read_alarm = ds3232_read_alarm,
.set_alarm = ds3232_set_alarm,
.alarm_irq_enable = ds3232_alarm_irq_enable,
- .update_irq_enable = ds3232_update_irq_enable,
};
static int __devinit ds3232_probe(struct i2c_client *client,
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index 2e16f72c905..b6473631d18 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -168,12 +168,6 @@ static int jz4740_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
return ret;
}
-static int jz4740_rtc_update_irq_enable(struct device *dev, unsigned int enable)
-{
- struct jz4740_rtc *rtc = dev_get_drvdata(dev);
- return jz4740_rtc_ctrl_set_bits(rtc, JZ_RTC_CTRL_1HZ_IRQ, enable);
-}
-
static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
{
struct jz4740_rtc *rtc = dev_get_drvdata(dev);
@@ -185,7 +179,6 @@ static struct rtc_class_ops jz4740_rtc_ops = {
.set_mmss = jz4740_rtc_set_mmss,
.read_alarm = jz4740_rtc_read_alarm,
.set_alarm = jz4740_rtc_set_alarm,
- .update_irq_enable = jz4740_rtc_update_irq_enable,
.alarm_irq_enable = jz4740_rtc_alarm_irq_enable,
};
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 5314b153bfb..c4200646955 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -282,12 +282,6 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev)
return IRQ_HANDLED;
}
-static int mc13xxx_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- return mc13xxx_rtc_irq_enable(dev, enabled, MC13XXX_IRQ_1HZ);
-}
-
static int mc13xxx_rtc_alarm_irq_enable(struct device *dev,
unsigned int enabled)
{
@@ -300,7 +294,6 @@ static const struct rtc_class_ops mc13xxx_rtc_ops = {
.read_alarm = mc13xxx_rtc_read_alarm,
.set_alarm = mc13xxx_rtc_set_alarm,
.alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
- .update_irq_enable = mc13xxx_rtc_update_irq_enable,
};
static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev)
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index dfcdf0901d2..b40c1ff1ebc 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -240,32 +240,12 @@ static int mpc5121_rtc_alarm_irq_enable(struct device *dev,
return 0;
}
-static int mpc5121_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev);
- struct mpc5121_rtc_regs __iomem *regs = rtc->regs;
- int val;
-
- val = in_8(&regs->int_enable);
-
- if (enabled)
- val = (val & ~0x8) | 0x1;
- else
- val &= ~0x1;
-
- out_8(&regs->int_enable, val);
-
- return 0;
-}
-
static const struct rtc_class_ops mpc5121_rtc_ops = {
.read_time = mpc5121_rtc_read_time,
.set_time = mpc5121_rtc_set_time,
.read_alarm = mpc5121_rtc_read_alarm,
.set_alarm = mpc5121_rtc_set_alarm,
.alarm_irq_enable = mpc5121_rtc_alarm_irq_enable,
- .update_irq_enable = mpc5121_rtc_update_irq_enable,
};
static int __devinit mpc5121_rtc_probe(struct platform_device *op,
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 1db62db8469..b86bc328463 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -62,6 +62,17 @@ static inline int is_intr(u8 rtc_intr)
return rtc_intr & RTC_IRQMASK;
}
+static inline unsigned char vrtc_is_updating(void)
+{
+ unsigned char uip;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ uip = (vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return uip;
+}
+
/*
* rtc_time's year contains the increment over 1900, but vRTC's YEAR
* register can't be programmed to value larger than 0x64, so vRTC
@@ -76,7 +87,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
{
unsigned long flags;
- if (rtc_is_updating())
+ if (vrtc_is_updating())
mdelay(20);
spin_lock_irqsave(&rtc_lock, flags);
@@ -236,25 +247,6 @@ static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
return 0;
}
-static int mrst_irq_set_state(struct device *dev, int enabled)
-{
- struct mrst_rtc *mrst = dev_get_drvdata(dev);
- unsigned long flags;
-
- if (!mrst->irq)
- return -ENXIO;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if (enabled)
- mrst_irq_enable(mrst, RTC_PIE);
- else
- mrst_irq_disable(mrst, RTC_PIE);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return 0;
-}
-
/* Currently, the vRTC doesn't support UIE ON/OFF */
static int mrst_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
@@ -301,7 +293,6 @@ static const struct rtc_class_ops mrst_rtc_ops = {
.read_alarm = mrst_read_alarm,
.set_alarm = mrst_set_alarm,
.proc = mrst_procfs,
- .irq_set_state = mrst_irq_set_state,
.alarm_irq_enable = mrst_rtc_alarm_irq_enable,
};
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 0b06c1e03fd..826ab64a8fa 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -274,12 +274,6 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int mxc_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- mxc_rtc_irq_enable(dev, RTC_1HZ_BIT, enabled);
- return 0;
-}
-
/*
* This function reads the current RTC time into tm in Gregorian date.
*/
@@ -368,7 +362,6 @@ static struct rtc_class_ops mxc_rtc_ops = {
.read_alarm = mxc_rtc_read_alarm,
.set_alarm = mxc_rtc_set_alarm,
.alarm_irq_enable = mxc_rtc_alarm_irq_enable,
- .update_irq_enable = mxc_rtc_update_irq_enable,
};
static int __init mxc_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index ddb0857e15a..781068d62f2 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -134,20 +134,6 @@ static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16;
}
-static int nuc900_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-
- if (enabled)
- __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)|
- (TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
- else
- __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)&
- (~TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
-
- return 0;
-}
-
static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct nuc900_rtc *rtc = dev_get_drvdata(dev);
@@ -234,7 +220,6 @@ static struct rtc_class_ops nuc900_rtc_ops = {
.read_alarm = nuc900_rtc_read_alarm,
.set_alarm = nuc900_rtc_set_alarm,
.alarm_irq_enable = nuc900_alarm_irq_enable,
- .update_irq_enable = nuc900_update_irq_enable,
};
static int __devinit nuc900_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index b4dbf3a319b..de0dd7b1f14 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -135,44 +135,6 @@ static irqreturn_t rtc_irq(int irq, void *rtc)
return IRQ_HANDLED;
}
-#ifdef CONFIG_RTC_INTF_DEV
-
-static int
-omap_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- u8 reg;
-
- switch (cmd) {
- case RTC_UIE_OFF:
- case RTC_UIE_ON:
- break;
- default:
- return -ENOIOCTLCMD;
- }
-
- local_irq_disable();
- rtc_wait_not_busy();
- reg = rtc_read(OMAP_RTC_INTERRUPTS_REG);
- switch (cmd) {
- /* UIE = Update Interrupt Enable (1/second) */
- case RTC_UIE_OFF:
- reg &= ~OMAP_RTC_INTERRUPTS_IT_TIMER;
- break;
- case RTC_UIE_ON:
- reg |= OMAP_RTC_INTERRUPTS_IT_TIMER;
- break;
- }
- rtc_wait_not_busy();
- rtc_write(reg, OMAP_RTC_INTERRUPTS_REG);
- local_irq_enable();
-
- return 0;
-}
-
-#else
-#define omap_rtc_ioctl NULL
-#endif
-
static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
u8 reg;
@@ -313,7 +275,6 @@ static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
}
static struct rtc_class_ops omap_rtc_ops = {
- .ioctl = omap_rtc_ioctl,
.read_time = omap_rtc_read_time,
.set_time = omap_rtc_set_time,
.read_alarm = omap_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index 25c0b3fd44f..a633abc4289 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -131,18 +131,12 @@ static int pcap_rtc_alarm_irq_enable(struct device *dev, unsigned int en)
return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en);
}
-static int pcap_rtc_update_irq_enable(struct device *dev, unsigned int en)
-{
- return pcap_rtc_irq_enable(dev, PCAP_IRQ_1HZ, en);
-}
-
static const struct rtc_class_ops pcap_rtc_ops = {
.read_time = pcap_rtc_read_time,
.read_alarm = pcap_rtc_read_alarm,
.set_alarm = pcap_rtc_set_alarm,
.set_mmss = pcap_rtc_set_mmss,
.alarm_irq_enable = pcap_rtc_alarm_irq_enable,
- .update_irq_enable = pcap_rtc_update_irq_enable,
};
static int __devinit pcap_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 16edf94ab42..f90c574f9d0 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -106,25 +106,6 @@ pcf50633_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int
-pcf50633_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct pcf50633_rtc *rtc = dev_get_drvdata(dev);
- int err;
-
- if (enabled)
- err = pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_SECOND);
- else
- err = pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_SECOND);
-
- if (err < 0)
- return err;
-
- rtc->second_enabled = enabled;
-
- return 0;
-}
-
static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct pcf50633_rtc *rtc;
@@ -262,8 +243,7 @@ static struct rtc_class_ops pcf50633_rtc_ops = {
.set_time = pcf50633_rtc_set_time,
.read_alarm = pcf50633_rtc_read_alarm,
.set_alarm = pcf50633_rtc_set_alarm,
- .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable,
- .update_irq_enable = pcf50633_rtc_update_irq_enable,
+ .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable,
};
static void pcf50633_rtc_irq(int irq, void *data)
diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c
index bbdb2f02798..d554368c9f5 100644
--- a/drivers/rtc/rtc-pl030.c
+++ b/drivers/rtc/rtc-pl030.c
@@ -35,11 +35,6 @@ static irqreturn_t pl030_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int pl030_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- return -ENOIOCTLCMD;
-}
-
static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
struct pl030_rtc *rtc = dev_get_drvdata(dev);
@@ -96,7 +91,6 @@ static int pl030_set_time(struct device *dev, struct rtc_time *tm)
}
static const struct rtc_class_ops pl030_ops = {
- .ioctl = pl030_ioctl,
.read_time = pl030_read_time,
.set_time = pl030_set_time,
.read_alarm = pl030_read_alarm,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index b7a6690e5b3..d829ea63c4f 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -293,57 +293,6 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
return ret;
}
-/* Periodic interrupt is only available in ST variants. */
-static int pl031_irq_set_state(struct device *dev, int enabled)
-{
- struct pl031_local *ldata = dev_get_drvdata(dev);
-
- if (enabled == 1) {
- /* Clear any pending timer interrupt. */
- writel(RTC_BIT_PI, ldata->base + RTC_ICR);
-
- writel(readl(ldata->base + RTC_IMSC) | RTC_BIT_PI,
- ldata->base + RTC_IMSC);
-
- /* Now start the timer */
- writel(readl(ldata->base + RTC_TCR) | RTC_TCR_EN,
- ldata->base + RTC_TCR);
-
- } else {
- writel(readl(ldata->base + RTC_IMSC) & (~RTC_BIT_PI),
- ldata->base + RTC_IMSC);
-
- /* Also stop the timer */
- writel(readl(ldata->base + RTC_TCR) & (~RTC_TCR_EN),
- ldata->base + RTC_TCR);
- }
- /* Wait at least 1 RTC32 clock cycle to ensure next access
- * to RTC_TCR will succeed.
- */
- udelay(40);
-
- return 0;
-}
-
-static int pl031_irq_set_freq(struct device *dev, int freq)
-{
- struct pl031_local *ldata = dev_get_drvdata(dev);
-
- /* Cant set timer if it is already enabled */
- if (readl(ldata->base + RTC_TCR) & RTC_TCR_EN) {
- dev_err(dev, "can't change frequency while timer enabled\n");
- return -EINVAL;
- }
-
- /* If self start bit in RTC_TCR is set timer will start here,
- * but we never set that bit. Instead we start the timer when
- * set_state is called with enabled == 1.
- */
- writel(RTC_TIMER_FREQ / freq, ldata->base + RTC_TLR);
-
- return 0;
-}
-
static int pl031_remove(struct amba_device *adev)
{
struct pl031_local *ldata = dev_get_drvdata(&adev->dev);
@@ -440,8 +389,6 @@ static struct rtc_class_ops stv1_pl031_ops = {
.read_alarm = pl031_read_alarm,
.set_alarm = pl031_set_alarm,
.alarm_irq_enable = pl031_alarm_irq_enable,
- .irq_set_state = pl031_irq_set_state,
- .irq_set_freq = pl031_irq_set_freq,
};
/* And the second ST derivative */
@@ -451,8 +398,6 @@ static struct rtc_class_ops stv2_pl031_ops = {
.read_alarm = pl031_stv2_read_alarm,
.set_alarm = pl031_stv2_set_alarm,
.alarm_irq_enable = pl031_alarm_irq_enable,
- .irq_set_state = pl031_irq_set_state,
- .irq_set_freq = pl031_irq_set_freq,
};
static struct amba_id pl031_ids[] = {
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 242bbf86c74..0a59fda5c09 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -69,6 +69,14 @@ static int rtc_proc_show(struct seq_file *seq, void *offset)
alrm.enabled ? "yes" : "no");
seq_printf(seq, "alrm_pending\t: %s\n",
alrm.pending ? "yes" : "no");
+ seq_printf(seq, "update IRQ enabled\t: %s\n",
+ (rtc->uie_rtctimer.enabled) ? "yes" : "no");
+ seq_printf(seq, "periodic IRQ enabled\t: %s\n",
+ (rtc->pie_enabled) ? "yes" : "no");
+ seq_printf(seq, "periodic IRQ frequency\t: %d\n",
+ rtc->irq_freq);
+ seq_printf(seq, "max user IRQ frequency\t: %d\n",
+ rtc->max_user_freq);
}
seq_printf(seq, "24hr\t\t: yes\n");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 29e867a1aaa..fc9f4991574 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -209,32 +209,6 @@ static void pxa_rtc_release(struct device *dev)
free_irq(pxa_rtc->irq_1Hz, dev);
}
-static int pxa_periodic_irq_set_freq(struct device *dev, int freq)
-{
- struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
- int period_ms;
-
- if (freq < 1 || freq > MAXFREQ_PERIODIC)
- return -EINVAL;
-
- period_ms = 1000 / freq;
- rtc_writel(pxa_rtc, PIAR, period_ms);
-
- return 0;
-}
-
-static int pxa_periodic_irq_set_state(struct device *dev, int enabled)
-{
- struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
-
- if (enabled)
- rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
- else
- rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
-
- return 0;
-}
-
static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -250,21 +224,6 @@ static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int pxa_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
-
- spin_lock_irq(&pxa_rtc->lock);
-
- if (enabled)
- rtsr_set_bits(pxa_rtc, RTSR_HZE);
- else
- rtsr_clear_bits(pxa_rtc, RTSR_HZE);
-
- spin_unlock_irq(&pxa_rtc->lock);
- return 0;
-}
-
static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -346,10 +305,7 @@ static const struct rtc_class_ops pxa_rtc_ops = {
.read_alarm = pxa_rtc_read_alarm,
.set_alarm = pxa_rtc_set_alarm,
.alarm_irq_enable = pxa_alarm_irq_enable,
- .update_irq_enable = pxa_update_irq_enable,
.proc = pxa_rtc_proc,
- .irq_set_state = pxa_periodic_irq_set_state,
- .irq_set_freq = pxa_periodic_irq_set_freq,
};
static int __init pxa_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 6aaa1550e3b..85c1b848dd7 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -281,57 +281,6 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
return rs5c372_set_datetime(to_i2c_client(dev), tm);
}
-#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
-
-static int
-rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct rs5c372 *rs5c = i2c_get_clientdata(client);
- unsigned char buf;
- int status, addr;
-
- buf = rs5c->regs[RS5C_REG_CTRL1];
- switch (cmd) {
- case RTC_UIE_OFF:
- case RTC_UIE_ON:
- /* some 327a modes use a different IRQ pin for 1Hz irqs */
- if (rs5c->type == rtc_rs5c372a
- && (buf & RS5C372A_CTRL1_SL1))
- return -ENOIOCTLCMD;
- default:
- return -ENOIOCTLCMD;
- }
-
- status = rs5c_get_regs(rs5c);
- if (status < 0)
- return status;
-
- addr = RS5C_ADDR(RS5C_REG_CTRL1);
- switch (cmd) {
- case RTC_UIE_OFF: /* update off */
- buf &= ~RS5C_CTRL1_CT_MASK;
- break;
- case RTC_UIE_ON: /* update on */
- buf &= ~RS5C_CTRL1_CT_MASK;
- buf |= RS5C_CTRL1_CT4;
- break;
- }
-
- if (i2c_smbus_write_byte_data(client, addr, buf) < 0) {
- printk(KERN_WARNING "%s: can't update alarm\n",
- rs5c->rtc->name);
- status = -EIO;
- } else
- rs5c->regs[RS5C_REG_CTRL1] = buf;
-
- return status;
-}
-
-#else
-#define rs5c_rtc_ioctl NULL
-#endif
-
static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
@@ -480,7 +429,6 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
static const struct rtc_class_ops rs5c372_rtc_ops = {
.proc = rs5c372_rtc_proc,
- .ioctl = rs5c_rtc_ioctl,
.read_time = rs5c372_rtc_read_time,
.set_time = rs5c372_rtc_set_time,
.read_alarm = rs5c_read_alarm,
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index af32a62e12a..fde172fb2ab 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -424,37 +424,12 @@ static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int rx8025_irq_set_state(struct device *dev, int enabled)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct rx8025_data *rx8025 = i2c_get_clientdata(client);
- int ctrl1;
- int err;
-
- if (client->irq <= 0)
- return -ENXIO;
-
- ctrl1 = rx8025->ctrl1 & ~RX8025_BIT_CTRL1_CT;
- if (enabled)
- ctrl1 |= RX8025_BIT_CTRL1_CT_1HZ;
- if (ctrl1 != rx8025->ctrl1) {
- rx8025->ctrl1 = ctrl1;
- err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
- rx8025->ctrl1);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static struct rtc_class_ops rx8025_rtc_ops = {
.read_time = rx8025_get_time,
.set_time = rx8025_set_time,
.read_alarm = rx8025_read_alarm,
.set_alarm = rx8025_set_alarm,
.alarm_irq_enable = rx8025_alarm_irq_enable,
- .irq_set_state = rx8025_irq_set_state,
};
/*
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index b80fa288240..714964913e5 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -93,37 +93,6 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
return 0;
}
-static int s3c_rtc_setpie(struct device *dev, int enabled)
-{
- unsigned int tmp;
-
- pr_debug("%s: pie=%d\n", __func__, enabled);
-
- spin_lock_irq(&s3c_rtc_pie_lock);
-
- if (s3c_rtc_cpu_type == TYPE_S3C64XX) {
- tmp = readw(s3c_rtc_base + S3C2410_RTCCON);
- tmp &= ~S3C64XX_RTCCON_TICEN;
-
- if (enabled)
- tmp |= S3C64XX_RTCCON_TICEN;
-
- writew(tmp, s3c_rtc_base + S3C2410_RTCCON);
- } else {
- tmp = readb(s3c_rtc_base + S3C2410_TICNT);
- tmp &= ~S3C2410_TICNT_ENABLE;
-
- if (enabled)
- tmp |= S3C2410_TICNT_ENABLE;
-
- writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
- }
-
- spin_unlock_irq(&s3c_rtc_pie_lock);
-
- return 0;
-}
-
static int s3c_rtc_setfreq(struct device *dev, int freq)
{
struct platform_device *pdev = to_platform_device(dev);
@@ -379,8 +348,6 @@ static const struct rtc_class_ops s3c_rtcops = {
.set_time = s3c_rtc_settime,
.read_alarm = s3c_rtc_getalarm,
.set_alarm = s3c_rtc_setalarm,
- .irq_set_freq = s3c_rtc_setfreq,
- .irq_set_state = s3c_rtc_setpie,
.proc = s3c_rtc_proc,
.alarm_irq_enable = s3c_rtc_setaie,
};
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 5dfe5ffcb0d..0b40bb88a88 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -43,7 +43,6 @@
#define RTC_DEF_TRIM 0
static const unsigned long RTC_FREQ = 1024;
-static unsigned long timer_freq;
static struct rtc_time rtc_alarm;
static DEFINE_SPINLOCK(sa1100_rtc_lock);
@@ -156,114 +155,11 @@ static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int sa1100_irq_set_freq(struct device *dev, int freq)
-{
- if (freq < 1 || freq > timer_freq) {
- return -EINVAL;
- } else {
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- rtc->irq_freq = freq;
-
- return 0;
- }
-}
-
-static int rtc_timer1_count;
-
-static int sa1100_irq_set_state(struct device *dev, int enabled)
-{
- spin_lock_irq(&sa1100_rtc_lock);
- if (enabled) {
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- OSMR1 = timer_freq / rtc->irq_freq + OSCR;
- OIER |= OIER_E1;
- rtc_timer1_count = 1;
- } else {
- OIER &= ~OIER_E1;
- }
- spin_unlock_irq(&sa1100_rtc_lock);
-
- return 0;
-}
-
-static inline int sa1100_timer1_retrigger(struct rtc_device *rtc)
-{
- unsigned long diff;
- unsigned long period = timer_freq / rtc->irq_freq;
-
- spin_lock_irq(&sa1100_rtc_lock);
-
- do {
- OSMR1 += period;
- diff = OSMR1 - OSCR;
- /* If OSCR > OSMR1, diff is a very large number (unsigned
- * math). This means we have a lost interrupt. */
- } while (diff > period);
- OIER |= OIER_E1;
-
- spin_unlock_irq(&sa1100_rtc_lock);
-
- return 0;
-}
-
-static irqreturn_t timer1_interrupt(int irq, void *dev_id)
-{
- struct platform_device *pdev = to_platform_device(dev_id);
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- /*
- * If we match for the first time, rtc_timer1_count will be 1.
- * Otherwise, we wrapped around (very unlikely but
- * still possible) so compute the amount of missed periods.
- * The match reg is updated only when the data is actually retrieved
- * to avoid unnecessary interrupts.
- */
- OSSR = OSSR_M1; /* clear match on timer1 */
-
- rtc_update_irq(rtc, rtc_timer1_count, RTC_PF | RTC_IRQF);
-
- if (rtc_timer1_count == 1)
- rtc_timer1_count =
- (rtc->irq_freq * ((1 << 30) / (timer_freq >> 2)));
-
- /* retrigger. */
- sa1100_timer1_retrigger(rtc);
-
- return IRQ_HANDLED;
-}
-
-static int sa1100_rtc_read_callback(struct device *dev, int data)
-{
- if (data & RTC_PF) {
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- /* interpolate missed periods and set match for the next */
- unsigned long period = timer_freq / rtc->irq_freq;
- unsigned long oscr = OSCR;
- unsigned long osmr1 = OSMR1;
- unsigned long missed = (oscr - osmr1)/period;
- data += missed << 8;
- OSSR = OSSR_M1; /* clear match on timer 1 */
- OSMR1 = osmr1 + (missed + 1)*period;
- /* Ensure we didn't miss another match in the mean time.
- * Here we compare (match - OSCR) 8 instead of 0 --
- * see comment in pxa_timer_interrupt() for explanation.
- */
- while ((signed long)((osmr1 = OSMR1) - OSCR) <= 8) {
- data += 0x100;
- OSSR = OSSR_M1; /* clear match on timer 1 */
- OSMR1 = osmr1 + period;
- }
- }
- return data;
-}
-
static int sa1100_rtc_open(struct device *dev)
{
int ret;
- struct rtc_device *rtc = (struct rtc_device *)dev;
+ struct platform_device *plat_dev = to_platform_device(dev);
+ struct rtc_device *rtc = platform_get_drvdata(plat_dev);
ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, IRQF_DISABLED,
"rtc 1Hz", dev);
@@ -277,19 +173,11 @@ static int sa1100_rtc_open(struct device *dev)
dev_err(dev, "IRQ %d already in use.\n", IRQ_RTCAlrm);
goto fail_ai;
}
- ret = request_irq(IRQ_OST1, timer1_interrupt, IRQF_DISABLED,
- "rtc timer", dev);
- if (ret) {
- dev_err(dev, "IRQ %d already in use.\n", IRQ_OST1);
- goto fail_pi;
- }
rtc->max_user_freq = RTC_FREQ;
- sa1100_irq_set_freq(dev, RTC_FREQ);
+ rtc_irq_set_freq(rtc, NULL, RTC_FREQ);
return 0;
- fail_pi:
- free_irq(IRQ_RTCAlrm, dev);
fail_ai:
free_irq(IRQ_RTC1Hz, dev);
fail_ui:
@@ -304,30 +192,10 @@ static void sa1100_rtc_release(struct device *dev)
OSSR = OSSR_M1;
spin_unlock_irq(&sa1100_rtc_lock);
- free_irq(IRQ_OST1, dev);
free_irq(IRQ_RTCAlrm, dev);
free_irq(IRQ_RTC1Hz, dev);
}
-
-static int sa1100_rtc_ioctl(struct device *dev, unsigned int cmd,
- unsigned long arg)
-{
- switch (cmd) {
- case RTC_UIE_OFF:
- spin_lock_irq(&sa1100_rtc_lock);
- RTSR &= ~RTSR_HZE;
- spin_unlock_irq(&sa1100_rtc_lock);
- return 0;
- case RTC_UIE_ON:
- spin_lock_irq(&sa1100_rtc_lock);
- RTSR |= RTSR_HZE;
- spin_unlock_irq(&sa1100_rtc_lock);
- return 0;
- }
- return -ENOIOCTLCMD;
-}
-
static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
spin_lock_irq(&sa1100_rtc_lock);
@@ -386,31 +254,20 @@ static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq)
{
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- seq_printf(seq, "trim/divider\t: 0x%08x\n", (u32) RTTR);
- seq_printf(seq, "update_IRQ\t: %s\n",
- (RTSR & RTSR_HZE) ? "yes" : "no");
- seq_printf(seq, "periodic_IRQ\t: %s\n",
- (OIER & OIER_E1) ? "yes" : "no");
- seq_printf(seq, "periodic_freq\t: %d\n", rtc->irq_freq);
- seq_printf(seq, "RTSR\t\t: 0x%08x\n", (u32)RTSR);
+ seq_printf(seq, "trim/divider\t\t: 0x%08x\n", (u32) RTTR);
+ seq_printf(seq, "RTSR\t\t\t: 0x%08x\n", (u32)RTSR);
return 0;
}
static const struct rtc_class_ops sa1100_rtc_ops = {
.open = sa1100_rtc_open,
- .read_callback = sa1100_rtc_read_callback,
.release = sa1100_rtc_release,
- .ioctl = sa1100_rtc_ioctl,
.read_time = sa1100_rtc_read_time,
.set_time = sa1100_rtc_set_time,
.read_alarm = sa1100_rtc_read_alarm,
.set_alarm = sa1100_rtc_set_alarm,
.proc = sa1100_rtc_proc,
- .irq_set_freq = sa1100_irq_set_freq,
- .irq_set_state = sa1100_irq_set_state,
.alarm_irq_enable = sa1100_rtc_alarm_irq_enable,
};
@@ -418,8 +275,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
{
struct rtc_device *rtc;
- timer_freq = get_clock_tick_rate();
-
/*
* According to the manual we should be able to let RTTR be zero
* and then a default diviser for a 32.768KHz clock is used.
@@ -445,11 +300,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, rtc);
- /* Set the irq_freq */
- /*TODO: Find out who is messing with this value after we initialize
- * it here.*/
- rtc->irq_freq = RTC_FREQ;
-
/* Fix for a nasty initialization problem the in SA11xx RTSR register.
* See also the comments in sa1100_rtc_interrupt().
*
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 93314a9e7fa..e55dc1ac83a 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -344,27 +344,6 @@ static inline void sh_rtc_setcie(struct device *dev, unsigned int enable)
spin_unlock_irq(&rtc->lock);
}
-static int sh_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- struct sh_rtc *rtc = dev_get_drvdata(dev);
- unsigned int ret = 0;
-
- switch (cmd) {
- case RTC_UIE_OFF:
- rtc->periodic_freq &= ~PF_OXS;
- sh_rtc_setcie(dev, 0);
- break;
- case RTC_UIE_ON:
- rtc->periodic_freq |= PF_OXS;
- sh_rtc_setcie(dev, 1);
- break;
- default:
- ret = -ENOIOCTLCMD;
- }
-
- return ret;
-}
-
static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
sh_rtc_setaie(dev, enabled);
@@ -598,13 +577,10 @@ static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
}
static struct rtc_class_ops sh_rtc_ops = {
- .ioctl = sh_rtc_ioctl,
.read_time = sh_rtc_read_time,
.set_time = sh_rtc_set_time,
.read_alarm = sh_rtc_read_alarm,
.set_alarm = sh_rtc_set_alarm,
- .irq_set_state = sh_rtc_irq_set_state,
- .irq_set_freq = sh_rtc_irq_set_freq,
.proc = sh_rtc_proc,
.alarm_irq_enable = sh_rtc_alarm_irq_enable,
};
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 7e7d0c806f2..572e9534b59 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -115,19 +115,6 @@ static int stmp3xxx_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int stmp3xxx_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
-
- if (enabled)
- stmp3xxx_setl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
- rtc_data->io + HW_RTC_CTRL);
- else
- stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
- rtc_data->io + HW_RTC_CTRL);
- return 0;
-}
-
static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
{
struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
@@ -149,8 +136,6 @@ static int stmp3xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
static struct rtc_class_ops stmp3xxx_rtc_ops = {
.alarm_irq_enable =
stmp3xxx_alarm_irq_enable,
- .update_irq_enable =
- stmp3xxx_update_irq_enable,
.read_time = stmp3xxx_rtc_gettime,
.set_mmss = stmp3xxx_rtc_set_mmss,
.read_alarm = stmp3xxx_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index a82d6fe9707..7e96254bd36 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -78,11 +78,16 @@ static ssize_t test_irq_store(struct device *dev,
struct rtc_device *rtc = platform_get_drvdata(plat_dev);
retval = count;
- if (strncmp(buf, "tick", 4) == 0)
+ if (strncmp(buf, "tick", 4) == 0 && rtc->pie_enabled)
rtc_update_irq(rtc, 1, RTC_PF | RTC_IRQF);
- else if (strncmp(buf, "alarm", 5) == 0)
- rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
- else if (strncmp(buf, "update", 6) == 0)
+ else if (strncmp(buf, "alarm", 5) == 0) {
+ struct rtc_wkalrm alrm;
+ int err = rtc_read_alarm(rtc, &alrm);
+
+ if (!err && alrm.enabled)
+ rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
+
+ } else if (strncmp(buf, "update", 6) == 0 && rtc->uie_rtctimer.enabled)
rtc_update_irq(rtc, 1, RTC_UF | RTC_IRQF);
else
retval = -EINVAL;
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index ed1b8682812..f9a2799c44d 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -213,18 +213,6 @@ static int twl_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
return ret;
}
-static int twl_rtc_update_irq_enable(struct device *dev, unsigned enabled)
-{
- int ret;
-
- if (enabled)
- ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
- else
- ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-
- return ret;
-}
-
/*
* Gets current TWL RTC time and date parameters.
*
@@ -433,7 +421,6 @@ static struct rtc_class_ops twl_rtc_ops = {
.read_alarm = twl_rtc_read_alarm,
.set_alarm = twl_rtc_set_alarm,
.alarm_irq_enable = twl_rtc_alarm_irq_enable,
- .update_irq_enable = twl_rtc_update_irq_enable,
};
/*----------------------------------------------------------------------*/
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 769190ac6d1..c5698cda366 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -207,36 +207,6 @@ static int vr41xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
return 0;
}
-static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq)
-{
- u64 count;
-
- if (!is_power_of_2(freq))
- return -EINVAL;
- count = RTC_FREQUENCY;
- do_div(count, freq);
-
- spin_lock_irq(&rtc_lock);
-
- periodic_count = count;
- rtc1_write(RTCL1LREG, periodic_count);
- rtc1_write(RTCL1HREG, periodic_count >> 16);
-
- spin_unlock_irq(&rtc_lock);
-
- return 0;
-}
-
-static int vr41xx_rtc_irq_set_state(struct device *dev, int enabled)
-{
- if (enabled)
- enable_irq(pie_irq);
- else
- disable_irq(pie_irq);
-
- return 0;
-}
-
static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -308,8 +278,6 @@ static const struct rtc_class_ops vr41xx_rtc_ops = {
.set_time = vr41xx_rtc_set_time,
.read_alarm = vr41xx_rtc_read_alarm,
.set_alarm = vr41xx_rtc_set_alarm,
- .irq_set_freq = vr41xx_rtc_irq_set_freq,
- .irq_set_state = vr41xx_rtc_irq_set_state,
};
static int __devinit rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 82931dc65c0..bdc909bd56d 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -315,21 +315,6 @@ static int wm831x_rtc_alarm_irq_enable(struct device *dev,
return wm831x_rtc_stop_alarm(wm831x_rtc);
}
-static int wm831x_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct wm831x_rtc *wm831x_rtc = dev_get_drvdata(dev);
- int val;
-
- if (enabled)
- val = 1 << WM831X_RTC_PINT_FREQ_SHIFT;
- else
- val = 0;
-
- return wm831x_set_bits(wm831x_rtc->wm831x, WM831X_RTC_CONTROL,
- WM831X_RTC_PINT_FREQ_MASK, val);
-}
-
static irqreturn_t wm831x_alm_irq(int irq, void *data)
{
struct wm831x_rtc *wm831x_rtc = data;
@@ -354,7 +339,6 @@ static const struct rtc_class_ops wm831x_rtc_ops = {
.read_alarm = wm831x_rtc_readalarm,
.set_alarm = wm831x_rtc_setalarm,
.alarm_irq_enable = wm831x_rtc_alarm_irq_enable,
- .update_irq_enable = wm831x_rtc_update_irq_enable,
};
#ifdef CONFIG_PM
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 3d0dc76b38a..66421426e40 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -302,26 +302,6 @@ static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
return ret;
}
-static int wm8350_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct wm8350 *wm8350 = dev_get_drvdata(dev);
-
- /* Suppress duplicate changes since genirq nests enable and
- * disable calls. */
- if (enabled == wm8350->rtc.update_enabled)
- return 0;
-
- if (enabled)
- wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC);
- else
- wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
-
- wm8350->rtc.update_enabled = enabled;
-
- return 0;
-}
-
static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data)
{
struct wm8350 *wm8350 = data;
@@ -357,7 +337,6 @@ static const struct rtc_class_ops wm8350_rtc_ops = {
.read_alarm = wm8350_rtc_readalarm,
.set_alarm = wm8350_rtc_setalarm,
.alarm_irq_enable = wm8350_rtc_alarm_irq_enable,
- .update_irq_enable = wm8350_rtc_update_irq_enable,
};
#ifdef CONFIG_PM
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 95928833855..a429b01d028 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1557,9 +1557,7 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
drv_data->ssp = ssp;
master->dev.parent = &pdev->dev;
-#ifdef CONFIG_OF
master->dev.of_node = pdev->dev.of_node;
-#endif
/* the spi->mode bits understood by this driver: */
master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
diff --git a/drivers/spi/pxa2xx_spi_pci.c b/drivers/spi/pxa2xx_spi_pci.c
index 19752b09e15..378e504f89e 100644
--- a/drivers/spi/pxa2xx_spi_pci.c
+++ b/drivers/spi/pxa2xx_spi_pci.c
@@ -89,9 +89,7 @@ static int __devinit ce4100_spi_probe(struct pci_dev *dev,
goto err_nomem;
pdev->dev.parent = &dev->dev;
-#ifdef CONFIG_OF
pdev->dev.of_node = dev->dev.of_node;
-#endif
ssp = &spi_info->ssp;
ssp->phys_base = pci_resource_start(dev, 0);
ssp->mmio_base = ioremap(phys_beg, phys_len);
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 7adaef62a99..4d2c75df886 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -351,14 +351,12 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
return IRQ_HANDLED;
}
-#ifdef CONFIG_OF
static const struct of_device_id xilinx_spi_of_match[] = {
{ .compatible = "xlnx,xps-spi-2.00.a", },
{ .compatible = "xlnx,xps-spi-2.00.b", },
{}
};
MODULE_DEVICE_TABLE(of, xilinx_spi_of_match);
-#endif
struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
u32 irq, s16 bus_num, int num_cs, int little_endian, int bits_per_word)
@@ -394,9 +392,7 @@ struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
master->bus_num = bus_num;
master->num_chipselect = num_cs;
-#ifdef CONFIG_OF
master->dev.of_node = dev->of_node;
-#endif
xspi->mem = *mem;
xspi->irq = irq;
@@ -539,9 +535,7 @@ static struct platform_driver xilinx_spi_driver = {
.driver = {
.name = XILINX_SPI_NAME,
.owner = THIS_MODULE,
-#ifdef CONFIG_OF
.of_match_table = xilinx_spi_of_match,
-#endif
},
};
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 158cecbec71..4a109835e42 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -282,6 +282,9 @@ int core_tmr_lun_reset(
atomic_set(&task->task_active, 0);
atomic_set(&task->task_stop, 0);
+ } else {
+ if (atomic_read(&task->task_execute_queue) != 0)
+ transport_remove_task_from_execute_queue(task, dev);
}
__transport_stop_task_timer(task, &flags);
@@ -301,6 +304,7 @@ int core_tmr_lun_reset(
DEBUG_LR("LUN_RESET: got t_transport_active = 1 for"
" task: %p, t_fe_count: %d dev: %p\n", task,
fe_count, dev);
+ atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock,
flags);
core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
@@ -310,6 +314,7 @@ int core_tmr_lun_reset(
}
DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p,"
" t_fe_count: %d dev: %p\n", task, fe_count, dev);
+ atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags);
core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 236e22d8cfa..4bbf6c147f8 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1207,7 +1207,7 @@ transport_get_task_from_execute_queue(struct se_device *dev)
*
*
*/
-static void transport_remove_task_from_execute_queue(
+void transport_remove_task_from_execute_queue(
struct se_task *task,
struct se_device *dev)
{
@@ -5549,7 +5549,8 @@ static void transport_generic_wait_for_tasks(
atomic_set(&T_TASK(cmd)->transport_lun_stop, 0);
}
- if (!atomic_read(&T_TASK(cmd)->t_transport_active))
+ if (!atomic_read(&T_TASK(cmd)->t_transport_active) ||
+ atomic_read(&T_TASK(cmd)->t_transport_aborted))
goto remove;
atomic_set(&T_TASK(cmd)->t_transport_stop, 1);
@@ -5956,6 +5957,9 @@ static void transport_processing_shutdown(struct se_device *dev)
atomic_set(&task->task_active, 0);
atomic_set(&task->task_stop, 0);
+ } else {
+ if (atomic_read(&task->task_execute_queue) != 0)
+ transport_remove_task_from_execute_queue(task, dev);
}
__transport_stop_task_timer(task, &flags);
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index eca855a55c0..3de4ba0260a 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -646,7 +646,7 @@ static int __devexit cpwd_remove(struct platform_device *op)
struct cpwd *p = dev_get_drvdata(&op->dev);
int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < WD_NUMDEVS; i++) {
misc_deregister(&p->devs[i].misc);
if (!p->enabled) {
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 24b966d5061..204a5603c4a 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -710,7 +710,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
return 0;
}
-static void __devexit hpwdt_exit_nmi_decoding(void)
+static void hpwdt_exit_nmi_decoding(void)
{
unregister_die_notifier(&die_notifier);
if (cru_rom_addr)
@@ -726,7 +726,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
return 0;
}
-static void __devexit hpwdt_exit_nmi_decoding(void)
+static void hpwdt_exit_nmi_decoding(void)
{
}
#endif /* CONFIG_HPWDT_NMI_DECODING */
diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c
index c7d67e9a746..79906255eeb 100644
--- a/drivers/watchdog/sbc_fitpc2_wdt.c
+++ b/drivers/watchdog/sbc_fitpc2_wdt.c
@@ -201,11 +201,14 @@ static struct miscdevice fitpc2_wdt_miscdev = {
static int __init fitpc2_wdt_init(void)
{
int err;
+ const char *brd_name;
- if (!strstr(dmi_get_system_info(DMI_BOARD_NAME), "SBC-FITPC2"))
+ brd_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+ if (!brd_name || !strstr(brd_name, "SBC-FITPC2"))
return -ENODEV;
- pr_info("%s found\n", dmi_get_system_info(DMI_BOARD_NAME));
+ pr_info("%s found\n", brd_name);
if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) {
pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT);
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index 0461858e07d..b61ab1c5429 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -508,7 +508,7 @@ static int __init sch311x_detect(int sio_config_port, unsigned short *addr)
sch311x_sio_outb(sio_config_port, 0x07, 0x0a);
/* Check if Logical Device Register is currently active */
- if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0)
+ if ((sch311x_sio_inb(sio_config_port, 0x30) & 0x01) == 0)
printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n");
/* Get the base address of the runtime registers */
diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c
index a6c12dec91a..df2a64dc967 100644
--- a/drivers/watchdog/w83697ug_wdt.c
+++ b/drivers/watchdog/w83697ug_wdt.c
@@ -109,7 +109,7 @@ static int w83697ug_select_wd_register(void)
outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */
outb_p(0x30, WDT_EFER); /* select CR30 */
c = inb_p(WDT_EFDR);
- outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
+ outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
return 0;
}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 43f9f02c7db..718050ace08 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
set_phys_to_machine(pfn, frame_list[i]);
/* Link back into the page tables if not highmem. */
- if (pfn < max_low_pfn) {
+ if (!xen_hvm_domain() && pfn < max_low_pfn) {
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
scrub_page(page);
- if (!PageHighMem(page)) {
+ if (!xen_hvm_domain() && !PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
__pte_ma(0), 0);
@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(pfn_to_page(pfn));
}
@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
static int __init balloon_init(void)
{
- unsigned long pfn, extra_pfn_end;
+ unsigned long pfn, nr_pages, extra_pfn_end;
struct page *page;
- if (!xen_pv_domain())
+ if (!xen_domain())
return -ENODEV;
pr_info("xen_balloon: Initialising balloon driver.\n");
- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ if (xen_pv_domain())
+ nr_pages = xen_start_info->nr_pages;
+ else
+ nr_pages = max_pfn;
+ balloon_stats.current_pages = min(nr_pages, max_pfn);
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74681478100..0ad1699a1b3 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -114,7 +114,7 @@ struct cpu_evtchn_s {
static __initdata struct cpu_evtchn_s init_evtchn_mask = {
.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
};
-static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
+static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
@@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
+ cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
#endif
clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
@@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void)
/* By default all event channels notify CPU#0. */
for_each_irq_desc(i, desc) {
- cpumask_copy(desc->affinity, cpumask_of(0));
+ cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
}
#endif
@@ -376,81 +376,69 @@ static void unmask_evtchn(int port)
put_cpu();
}
-static int get_nr_hw_irqs(void)
+static int xen_allocate_irq_dynamic(void)
{
- int ret = 1;
+ int first = 0;
+ int irq;
#ifdef CONFIG_X86_IO_APIC
- ret = get_nr_irqs_gsi();
+ /*
+ * For an HVM guest or domain 0 which see "real" (emulated or
+ * actual repectively) GSIs we allocate dynamic IRQs
+ * e.g. those corresponding to event channels or MSIs
+ * etc. from the range above those "real" GSIs to avoid
+ * collisions.
+ */
+ if (xen_initial_domain() || xen_hvm_domain())
+ first = get_nr_irqs_gsi();
#endif
- return ret;
-}
+retry:
+ irq = irq_alloc_desc_from(first, -1);
-static int find_unbound_pirq(int type)
-{
- int rc, i;
- struct physdev_get_free_pirq op_get_free_pirq;
- op_get_free_pirq.type = type;
+ if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
+ printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
+ first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
+ goto retry;
+ }
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
- if (!rc)
- return op_get_free_pirq.pirq;
+ if (irq < 0)
+ panic("No available IRQ to bind to: increase nr_irqs!\n");
- for (i = 0; i < nr_irqs; i++) {
- if (pirq_to_irq[i] < 0)
- return i;
- }
- return -1;
+ return irq;
}
-static int find_unbound_irq(void)
+static int xen_allocate_irq_gsi(unsigned gsi)
{
- struct irq_data *data;
- int irq, res;
- int bottom = get_nr_hw_irqs();
- int top = nr_irqs-1;
-
- if (bottom == nr_irqs)
- goto no_irqs;
+ int irq;
- /* This loop starts from the top of IRQ space and goes down.
- * We need this b/c if we have a PCI device in a Xen PV guest
- * we do not have an IO-APIC (though the backend might have them)
- * mapped in. To not have a collision of physical IRQs with the Xen
- * event channels start at the top of the IRQ space for virtual IRQs.
+ /*
+ * A PV guest has no concept of a GSI (since it has no ACPI
+ * nor access to/knowledge of the physical APICs). Therefore
+ * all IRQs are dynamically allocated from the entire IRQ
+ * space.
*/
- for (irq = top; irq > bottom; irq--) {
- data = irq_get_irq_data(irq);
- /* only 15->0 have init'd desc; handle irq > 16 */
- if (!data)
- break;
- if (data->chip == &no_irq_chip)
- break;
- if (data->chip != &xen_dynamic_chip)
- continue;
- if (irq_info[irq].type == IRQT_UNBOUND)
- return irq;
- }
-
- if (irq == bottom)
- goto no_irqs;
+ if (xen_pv_domain() && !xen_initial_domain())
+ return xen_allocate_irq_dynamic();
- res = irq_alloc_desc_at(irq, -1);
+ /* Legacy IRQ descriptors are already allocated by the arch. */
+ if (gsi < NR_IRQS_LEGACY)
+ return gsi;
- if (WARN_ON(res != irq))
- return -1;
+ irq = irq_alloc_desc_at(gsi, -1);
+ if (irq < 0)
+ panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
return irq;
-
-no_irqs:
- panic("No available IRQ to bind to: increase nr_irqs!\n");
}
-static bool identity_mapped_irq(unsigned irq)
+static void xen_free_irq(unsigned irq)
{
- /* identity map all the hardware irqs */
- return irq < get_nr_hw_irqs();
+ /* Legacy IRQ descriptors are managed by the arch. */
+ if (irq < NR_IRQS_LEGACY)
+ return;
+
+ irq_free_desc(irq);
}
static void pirq_unmask_notify(int irq)
@@ -486,7 +474,7 @@ static bool probing_irq(int irq)
return desc && desc->action == NULL;
}
-static unsigned int startup_pirq(unsigned int irq)
+static unsigned int __startup_pirq(unsigned int irq)
{
struct evtchn_bind_pirq bind_pirq;
struct irq_info *info = info_for_irq(irq);
@@ -524,9 +512,15 @@ out:
return 0;
}
-static void shutdown_pirq(unsigned int irq)
+static unsigned int startup_pirq(struct irq_data *data)
+{
+ return __startup_pirq(data->irq);
+}
+
+static void shutdown_pirq(struct irq_data *data)
{
struct evtchn_close close;
+ unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
int evtchn = evtchn_from_irq(irq);
@@ -546,20 +540,20 @@ static void shutdown_pirq(unsigned int irq)
info->evtchn = 0;
}
-static void enable_pirq(unsigned int irq)
+static void enable_pirq(struct irq_data *data)
{
- startup_pirq(irq);
+ startup_pirq(data);
}
-static void disable_pirq(unsigned int irq)
+static void disable_pirq(struct irq_data *data)
{
}
-static void ack_pirq(unsigned int irq)
+static void ack_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
- move_native_irq(irq);
+ move_native_irq(data->irq);
if (VALID_EVTCHN(evtchn)) {
mask_evtchn(evtchn);
@@ -567,23 +561,6 @@ static void ack_pirq(unsigned int irq)
}
}
-static void end_pirq(unsigned int irq)
-{
- int evtchn = evtchn_from_irq(irq);
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (WARN_ON(!desc))
- return;
-
- if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
- (IRQ_DISABLED|IRQ_PENDING)) {
- shutdown_pirq(irq);
- } else if (VALID_EVTCHN(evtchn)) {
- unmask_evtchn(evtchn);
- pirq_unmask_notify(irq);
- }
-}
-
static int find_irq_by_gsi(unsigned gsi)
{
int irq;
@@ -638,14 +615,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
goto out; /* XXX need refcount? */
}
- /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
- * we are using the !xen_initial_domain() to drop in the function.*/
- if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
- xen_pv_domain())) {
- irq = gsi;
- irq_alloc_desc_at(irq, -1);
- } else
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_gsi(gsi);
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
handle_level_irq, name);
@@ -658,7 +628,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
* this in the priv domain. */
if (xen_initial_domain() &&
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
- irq_free_desc(irq);
+ xen_free_irq(irq);
irq = -ENOSPC;
goto out;
}
@@ -674,87 +644,46 @@ out:
}
#ifdef CONFIG_PCI_MSI
-#include <linux/msi.h>
-#include "../pci/msi.h"
-
-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
{
- spin_lock(&irq_mapping_update_lock);
-
- if (alloc & XEN_ALLOC_IRQ) {
- *irq = find_unbound_irq();
- if (*irq == -1)
- goto out;
- }
-
- if (alloc & XEN_ALLOC_PIRQ) {
- *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
- if (*pirq == -1)
- goto out;
- }
+ int rc;
+ struct physdev_get_free_pirq op_get_free_pirq;
- set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
- handle_level_irq, name);
+ op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
- irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
- pirq_to_irq[*pirq] = *irq;
+ WARN_ONCE(rc == -ENOSYS,
+ "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
-out:
- spin_unlock(&irq_mapping_update_lock);
+ return rc ? -1 : op_get_free_pirq.pirq;
}
-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ int pirq, int vector, const char *name)
{
- int irq = -1;
- struct physdev_map_pirq map_irq;
- int rc;
- int pos;
- u32 table_offset, bir;
-
- memset(&map_irq, 0, sizeof(map_irq));
- map_irq.domid = DOMID_SELF;
- map_irq.type = MAP_PIRQ_TYPE_MSI;
- map_irq.index = -1;
- map_irq.pirq = -1;
- map_irq.bus = dev->bus->number;
- map_irq.devfn = dev->devfn;
-
- if (type == PCI_CAP_ID_MSIX) {
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-
- pci_read_config_dword(dev, msix_table_offset_reg(pos),
- &table_offset);
- bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
-
- map_irq.table_base = pci_resource_start(dev, bir);
- map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
- }
+ int irq, ret;
spin_lock(&irq_mapping_update_lock);
- irq = find_unbound_irq();
-
+ irq = xen_allocate_irq_dynamic();
if (irq == -1)
goto out;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
- if (rc) {
- printk(KERN_WARNING "xen map irq failed %d\n", rc);
-
- irq_free_desc(irq);
-
- irq = -1;
- goto out;
- }
- irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
-
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- handle_level_irq,
- (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
+ handle_level_irq, name);
+ irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
+ pirq_to_irq[pirq] = irq;
+ ret = irq_set_msi_desc(irq, msidesc);
+ if (ret < 0)
+ goto error_irq;
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
+error_irq:
+ spin_unlock(&irq_mapping_update_lock);
+ xen_free_irq(irq);
+ return -1;
}
#endif
@@ -779,11 +708,12 @@ int xen_destroy_irq(int irq)
printk(KERN_WARNING "unmap irq failed %d\n", rc);
goto out;
}
- pirq_to_irq[info->u.pirq.pirq] = -1;
}
+ pirq_to_irq[info->u.pirq.pirq] = -1;
+
irq_info[irq] = mk_unbound_info();
- irq_free_desc(irq);
+ xen_free_irq(irq);
out:
spin_unlock(&irq_mapping_update_lock);
@@ -814,7 +744,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
irq = evtchn_to_irq[evtchn];
if (irq == -1) {
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_dynamic();
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_fasteoi_irq, "event");
@@ -839,7 +769,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
irq = per_cpu(ipi_to_irq, cpu)[ipi];
if (irq == -1) {
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_dynamic();
if (irq < 0)
goto out;
@@ -875,7 +805,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
irq = per_cpu(virq_to_irq, cpu)[virq];
if (irq == -1) {
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_dynamic();
set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
handle_percpu_irq, "virq");
@@ -934,7 +864,7 @@ static void unbind_from_irq(unsigned int irq)
if (irq_info[irq].type != IRQT_UNBOUND) {
irq_info[irq] = mk_unbound_info();
- irq_free_desc(irq);
+ xen_free_irq(irq);
}
spin_unlock(&irq_mapping_update_lock);
@@ -990,7 +920,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
if (irq < 0)
return irq;
- irqflags |= IRQF_NO_SUSPEND;
+ irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
@@ -1234,11 +1164,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
return 0;
}
-static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
+ bool force)
{
unsigned tcpu = cpumask_first(dest);
- return rebind_irq_to_cpu(irq, tcpu);
+ return rebind_irq_to_cpu(data->irq, tcpu);
}
int resend_irq_on_evtchn(unsigned int irq)
@@ -1257,35 +1188,35 @@ int resend_irq_on_evtchn(unsigned int irq)
return 1;
}
-static void enable_dynirq(unsigned int irq)
+static void enable_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
-static void disable_dynirq(unsigned int irq)
+static void disable_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
if (VALID_EVTCHN(evtchn))
mask_evtchn(evtchn);
}
-static void ack_dynirq(unsigned int irq)
+static void ack_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
- move_masked_irq(irq);
+ move_masked_irq(data->irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
-static int retrigger_dynirq(unsigned int irq)
+static int retrigger_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
struct shared_info *sh = HYPERVISOR_shared_info;
int ret = 0;
@@ -1334,7 +1265,7 @@ static void restore_cpu_pirqs(void)
printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
- startup_pirq(irq);
+ __startup_pirq(irq);
}
}
@@ -1445,7 +1376,6 @@ void xen_poll_irq(int irq)
void xen_irq_resume(void)
{
unsigned int cpu, irq, evtchn;
- struct irq_desc *desc;
init_evtchn_cpu_bindings();
@@ -1465,66 +1395,48 @@ void xen_irq_resume(void)
restore_cpu_ipis(cpu);
}
- /*
- * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
- * are not handled by the IRQ core.
- */
- for_each_irq_desc(irq, desc) {
- if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
- continue;
- if (desc->status & IRQ_DISABLED)
- continue;
-
- evtchn = evtchn_from_irq(irq);
- if (evtchn == -1)
- continue;
-
- unmask_evtchn(evtchn);
- }
-
restore_cpu_pirqs();
}
static struct irq_chip xen_dynamic_chip __read_mostly = {
- .name = "xen-dyn",
+ .name = "xen-dyn",
- .disable = disable_dynirq,
- .mask = disable_dynirq,
- .unmask = enable_dynirq,
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
- .eoi = ack_dynirq,
- .set_affinity = set_affinity_irq,
- .retrigger = retrigger_dynirq,
+ .irq_eoi = ack_dynirq,
+ .irq_set_affinity = set_affinity_irq,
+ .irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_pirq_chip __read_mostly = {
- .name = "xen-pirq",
+ .name = "xen-pirq",
- .startup = startup_pirq,
- .shutdown = shutdown_pirq,
+ .irq_startup = startup_pirq,
+ .irq_shutdown = shutdown_pirq,
- .enable = enable_pirq,
- .unmask = enable_pirq,
+ .irq_enable = enable_pirq,
+ .irq_unmask = enable_pirq,
- .disable = disable_pirq,
- .mask = disable_pirq,
+ .irq_disable = disable_pirq,
+ .irq_mask = disable_pirq,
- .ack = ack_pirq,
- .end = end_pirq,
+ .irq_ack = ack_pirq,
- .set_affinity = set_affinity_irq,
+ .irq_set_affinity = set_affinity_irq,
- .retrigger = retrigger_dynirq,
+ .irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_percpu_chip __read_mostly = {
- .name = "xen-percpu",
+ .name = "xen-percpu",
- .disable = disable_dynirq,
- .mask = disable_dynirq,
- .unmask = enable_dynirq,
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
- .ack = ack_dynirq,
+ .irq_ack = ack_dynirq,
};
int xen_set_callback_via(uint64_t via)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 24177272bcb..ebb292859b5 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -34,42 +34,38 @@ enum shutdown_state {
/* Ignore multiple shutdown requests. */
static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
-#ifdef CONFIG_PM_SLEEP
-static int xen_hvm_suspend(void *data)
-{
- int err;
- struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
- int *cancelled = data;
-
- BUG_ON(!irqs_disabled());
-
- err = sysdev_suspend(PMSG_SUSPEND);
- if (err) {
- printk(KERN_ERR "xen_hvm_suspend: sysdev_suspend failed: %d\n",
- err);
- return err;
- }
-
- *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
+struct suspend_info {
+ int cancelled;
+ unsigned long arg; /* extra hypercall argument */
+ void (*pre)(void);
+ void (*post)(int cancelled);
+};
- xen_hvm_post_suspend(*cancelled);
+static void xen_hvm_post_suspend(int cancelled)
+{
+ xen_arch_hvm_post_suspend(cancelled);
gnttab_resume();
+}
- if (!*cancelled) {
- xen_irq_resume();
- xen_console_resume();
- xen_timer_resume();
- }
-
- sysdev_resume();
+static void xen_pre_suspend(void)
+{
+ xen_mm_pin_all();
+ gnttab_suspend();
+ xen_arch_pre_suspend();
+}
- return 0;
+static void xen_post_suspend(int cancelled)
+{
+ xen_arch_post_suspend(cancelled);
+ gnttab_resume();
+ xen_mm_unpin_all();
}
+#ifdef CONFIG_PM_SLEEP
static int xen_suspend(void *data)
{
+ struct suspend_info *si = data;
int err;
- int *cancelled = data;
BUG_ON(!irqs_disabled());
@@ -80,22 +76,20 @@ static int xen_suspend(void *data)
return err;
}
- xen_mm_pin_all();
- gnttab_suspend();
- xen_pre_suspend();
+ if (si->pre)
+ si->pre();
/*
* This hypercall returns 1 if suspend was cancelled
* or the domain was merely checkpointed, and 0 if it
* is resuming in a new domain.
*/
- *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+ si->cancelled = HYPERVISOR_suspend(si->arg);
- xen_post_suspend(*cancelled);
- gnttab_resume();
- xen_mm_unpin_all();
+ if (si->post)
+ si->post(si->cancelled);
- if (!*cancelled) {
+ if (!si->cancelled) {
xen_irq_resume();
xen_console_resume();
xen_timer_resume();
@@ -109,7 +103,7 @@ static int xen_suspend(void *data)
static void do_suspend(void)
{
int err;
- int cancelled = 1;
+ struct suspend_info si;
shutting_down = SHUTDOWN_SUSPEND;
@@ -139,20 +133,29 @@ static void do_suspend(void)
goto out_resume;
}
- if (xen_hvm_domain())
- err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
- else
- err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+ si.cancelled = 1;
+
+ if (xen_hvm_domain()) {
+ si.arg = 0UL;
+ si.pre = NULL;
+ si.post = &xen_hvm_post_suspend;
+ } else {
+ si.arg = virt_to_mfn(xen_start_info);
+ si.pre = &xen_pre_suspend;
+ si.post = &xen_post_suspend;
+ }
+
+ err = stop_machine(xen_suspend, &si, cpumask_of(0));
dpm_resume_noirq(PMSG_RESUME);
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
- cancelled = 1;
+ si.cancelled = 1;
}
out_resume:
- if (!cancelled) {
+ if (!si.cancelled) {
xen_arch_resume();
xs_resume();
} else
@@ -172,12 +175,39 @@ out:
}
#endif /* CONFIG_PM_SLEEP */
+struct shutdown_handler {
+ const char *command;
+ void (*cb)(void);
+};
+
+static void do_poweroff(void)
+{
+ shutting_down = SHUTDOWN_POWEROFF;
+ orderly_poweroff(false);
+}
+
+static void do_reboot(void)
+{
+ shutting_down = SHUTDOWN_POWEROFF; /* ? */
+ ctrl_alt_del();
+}
+
static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
char *str;
struct xenbus_transaction xbt;
int err;
+ static struct shutdown_handler handlers[] = {
+ { "poweroff", do_poweroff },
+ { "halt", do_poweroff },
+ { "reboot", do_reboot },
+#ifdef CONFIG_PM_SLEEP
+ { "suspend", do_suspend },
+#endif
+ {NULL, NULL},
+ };
+ static struct shutdown_handler *handler;
if (shutting_down != SHUTDOWN_INVALID)
return;
@@ -194,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
return;
}
- xenbus_write(xbt, "control", "shutdown", "");
+ for (handler = &handlers[0]; handler->command; handler++) {
+ if (strcmp(str, handler->command) == 0)
+ break;
+ }
+
+ /* Only acknowledge commands which we are prepared to handle. */
+ if (handler->cb)
+ xenbus_write(xbt, "control", "shutdown", "");
err = xenbus_transaction_end(xbt, 0);
if (err == -EAGAIN) {
@@ -202,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
goto again;
}
- if (strcmp(str, "poweroff") == 0 ||
- strcmp(str, "halt") == 0) {
- shutting_down = SHUTDOWN_POWEROFF;
- orderly_poweroff(false);
- } else if (strcmp(str, "reboot") == 0) {
- shutting_down = SHUTDOWN_POWEROFF; /* ? */
- ctrl_alt_del();
-#ifdef CONFIG_PM_SLEEP
- } else if (strcmp(str, "suspend") == 0) {
- do_suspend();
-#endif
+ if (handler->cb) {
+ handler->cb();
} else {
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
@@ -291,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
return NOTIFY_DONE;
}
-static int __init __setup_shutdown_event(void)
-{
- /* Delay initialization in the PV on HVM case */
- if (xen_hvm_domain())
- return 0;
-
- if (!xen_pv_domain())
- return -ENODEV;
-
- return xen_setup_shutdown_event();
-}
-
int xen_setup_shutdown_event(void)
{
static struct notifier_block xenstore_notifier = {
.notifier_call = shutdown_event
};
+
+ if (!xen_domain())
+ return -ENODEV;
register_xenstore_notifier(&xenstore_notifier);
return 0;
}
EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
-subsys_initcall(__setup_shutdown_event);
+subsys_initcall(xen_setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index afbe041f42c..319dd0a94d5 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
if (ret)
goto out;
xenbus_probe(NULL);
- ret = xen_setup_shutdown_event();
- if (ret)
- goto out;
return 0;
out:
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57ed..7cb53aafac1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
def_bool n
config EXPORTFS
- tristate
+ bool
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c..ba01202844c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_FHANDLE) += fhandle.o
+
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f820fa23df..7f78cc78fdd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b..b4ffad859ad 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
int len = *max_len;
int type;
- if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
- (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return 255;
+ } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+ *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ return 255;
+ }
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 588ff984987..7b3089b5c2d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved) {
- loops = 0;
+ if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
- } else {
- loops++;
- }
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- time_left = schedule_timeout(pause);
+ time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
if (time_left)
break;
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd3f172e94e..714adc4ac4c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- off = extent_map_end(em);
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
+
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
+
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
- em_start = em->start;
- em_len = em->len;
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
em_end = extent_map_end(em);
+ em_len = em_end - em_start;
emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d594..f447b783bb8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
@@ -763,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -794,15 +829,24 @@ again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
- int c;
- for (c = i - 1; c >= 0; c--) {
- unlock_page(pages[c]);
- page_cache_release(pages[c]);
- }
- return -ENOMEM;
+ faili = i - 1;
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
- pinned[0] = NULL;
- pinned[1] = NULL;
-
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
-
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
err = ret;
kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
*ppos = pos;
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0efdb65953c..4a0107e1874 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4806,9 +4806,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
int err;
int drop_inode = 0;
- if (inode->i_nlink == 0)
- return -ENOENT;
-
/* do not allow sys_link's with other subvols of the same device */
if (root->objectid != BTRFS_I(inode)->root->objectid)
return -EPERM;
@@ -4821,10 +4818,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
/*
- * 1 item for inode ref
+ * 2 items for inode and inode ref
* 2 items for dir items
+ * 1 item for parent inode
*/
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto fail;
@@ -6056,6 +6054,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
if (!skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) {
+ kfree(dip);
ret = -ENOMEM;
goto free_ordered;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 099a58615b9..ebafa65a29b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -993,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6c..c6d31a3bab8 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
*/
asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs(buf, &tmp);
- path_put(&path);
- }
+ struct kstatfs tmp;
+ int error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
{
- struct file * file;
struct kstatfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ int error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct path path;
+ struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs64(buf, &tmp);
- path_put(&path);
- }
+ error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs64(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct file * file;
struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PREAD)
+ ret = compat_readv(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
}
#endif /* CONFIG_TIMERFD */
+
+#ifdef CONFIG_FHANDLE
+/*
+ * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
+ * doesn't set the O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_open_by_handle_at(int mountdirfd,
+ struct file_handle __user *handle, int flags)
+{
+ return do_handle_open(mountdirfd, handle, flags);
+}
+#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae9..a39fe47c466 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
- dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
+ /*
+ * Inform try_to_ascend() that we are no longer attached to the
+ * dentry tree
+ */
+ dentry->d_flags |= DCACHE_DISCONNECTED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DISCONNECTED) ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1181,24 +1200,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ __dget(alias);
+ return alias;
+}
+
+static struct dentry * d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
if (IS_ERR(inode))
return ERR_CAST(inode);
- res = d_find_alias(inode);
+ res = d_find_any_alias(inode);
if (res)
goto out_iput;
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
spin_lock(&inode->i_lock);
- res = __d_find_alias(inode, 0);
+ res = __d_find_any_alias(inode);
if (res) {
spin_unlock(&inode->i_lock);
dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
+ struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6a..ba99e1abb1a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
struct file *file;
char *tmp = getname(library);
int error = PTR_ERR(tmp);
+ static const struct open_flags uselib_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp,
- O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
- MAY_READ | MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
{
struct file *file;
int err;
+ static const struct open_flags open_exec_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
- file = do_filp_open(AT_FDCWD, name,
- O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
- MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
if (IS_ERR(file))
goto out;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd..b05acb79613 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
struct inode * inode = dentry->d_inode;
int len = *max_len;
int type = FILEID_INO32_GEN;
-
- if (len < 2 || (connectable && len < 4))
+
+ if (connectable && (len < 4)) {
+ *max_len = 4;
+ return 255;
+ } else if (len < 2) {
+ *max_len = 2;
return 255;
+ }
len = 2;
fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
/*
* Try to get any dentry for the given file handle from the filesystem.
*/
+ if (!nop || !nop->fh_to_dentry)
+ return ERR_PTR(-ESTALE);
result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
if (!result)
result = ERR_PTR(-ESTALE);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810e..561f6925626 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f247..9cc19a1dea8 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext3_qctl_operations;
sb->dq_op = &ext3_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c..e781b7ea563 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b..5977b356a43 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd..0e277ec4b61 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
struct inode *inode = de->d_inode;
u32 ipos_h, ipos_m, ipos_l;
- if (len < 5)
+ if (len < 5) {
+ *lenp = 5;
return 255; /* no room */
+ }
ipos_h = MSDOS_I(inode)->i_pos >> 8;
ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd..adae3fb7451 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bd..6c82e5bac03 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
SYSCALL_DEFINE1(dup, unsigned int, fildes)
{
int ret = -EBADF;
- struct file *file = fget(fildes);
+ struct file *file = fget_raw(fildes);
if (file) {
ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
return err;
}
+static int check_fcntl_cmd(unsigned cmd)
+{
+ switch (cmd) {
+ case F_DUPFD:
+ case F_DUPFD_CLOEXEC:
+ case F_GETFD:
+ case F_SETFD:
+ case F_GETFL:
+ return 1;
+ }
+ return 0;
+}
+
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
struct file *filp;
long err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
long err;
err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC
+ __FMODE_EXEC | O_PATH
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 00000000000..bf93ad2bee0
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/exportfs.h>
+#include <linux/fs_struct.h>
+#include <linux/fsnotify.h>
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static long do_sys_name_to_handle(struct path *path,
+ struct file_handle __user *ufh,
+ int __user *mnt_id)
+{
+ long retval;
+ struct file_handle f_handle;
+ int handle_dwords, handle_bytes;
+ struct file_handle *handle = NULL;
+
+ /*
+ * We need t make sure wether the file system
+ * support decoding of the file handle
+ */
+ if (!path->mnt->mnt_sb->s_export_op ||
+ !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
+ return -EFAULT;
+
+ if (f_handle.handle_bytes > MAX_HANDLE_SZ)
+ return -EINVAL;
+
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ /* convert handle size to multiple of sizeof(u32) */
+ handle_dwords = f_handle.handle_bytes >> 2;
+
+ /* we ask for a non connected handle */
+ retval = exportfs_encode_fh(path->dentry,
+ (struct fid *)handle->f_handle,
+ &handle_dwords, 0);
+ handle->handle_type = retval;
+ /* convert handle size to bytes */
+ handle_bytes = handle_dwords * sizeof(u32);
+ handle->handle_bytes = handle_bytes;
+ if ((handle->handle_bytes > f_handle.handle_bytes) ||
+ (retval == 255) || (retval == -ENOSPC)) {
+ /* As per old exportfs_encode_fh documentation
+ * we could return ENOSPC to indicate overflow
+ * But file system returned 255 always. So handle
+ * both the values
+ */
+ /*
+ * set the handle size to zero so we copy only
+ * non variable part of the file_handle
+ */
+ handle_bytes = 0;
+ retval = -EOVERFLOW;
+ } else
+ retval = 0;
+ /* copy the mount id */
+ if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
+ copy_to_user(ufh, handle,
+ sizeof(struct file_handle) + handle_bytes))
+ retval = -EFAULT;
+ kfree(handle);
+ return retval;
+}
+
+/**
+ * sys_name_to_handle_at: convert name to handle
+ * @dfd: directory relative to which name is interpreted if not absolute
+ * @name: name that should be converted to handle.
+ * @handle: resulting file handle
+ * @mnt_id: mount id of the file system containing the file
+ * @flag: flag value to indicate whether to follow symlink or not
+ *
+ * @handle->handle_size indicate the space available to store the
+ * variable part of the file handle in bytes. If there is not
+ * enough space, the field is updated to return the minimum
+ * value required.
+ */
+SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
+ struct file_handle __user *, handle, int __user *, mnt_id,
+ int, flag)
+{
+ struct path path;
+ int lookup_flags;
+ int err;
+
+ if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
+ return -EINVAL;
+
+ lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ err = user_path_at(dfd, name, lookup_flags, &path);
+ if (!err) {
+ err = do_sys_name_to_handle(&path, handle, mnt_id);
+ path_put(&path);
+ }
+ return err;
+}
+
+static struct vfsmount *get_vfsmount_from_fd(int fd)
+{
+ struct path path;
+
+ if (fd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
+ spin_lock(&fs->lock);
+ path = fs->pwd;
+ mntget(path.mnt);
+ spin_unlock(&fs->lock);
+ } else {
+ int fput_needed;
+ struct file *file = fget_light(fd, &fput_needed);
+ if (!file)
+ return ERR_PTR(-EBADF);
+ path = file->f_path;
+ mntget(path.mnt);
+ fput_light(file, fput_needed);
+ }
+ return path.mnt;
+}
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+ return 1;
+}
+
+static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
+ struct path *path)
+{
+ int retval = 0;
+ int handle_dwords;
+
+ path->mnt = get_vfsmount_from_fd(mountdirfd);
+ if (IS_ERR(path->mnt)) {
+ retval = PTR_ERR(path->mnt);
+ goto out_err;
+ }
+ /* change the handle size to multiple of sizeof(u32) */
+ handle_dwords = handle->handle_bytes >> 2;
+ path->dentry = exportfs_decode_fh(path->mnt,
+ (struct fid *)handle->f_handle,
+ handle_dwords, handle->handle_type,
+ vfs_dentry_acceptable, NULL);
+ if (IS_ERR(path->dentry)) {
+ retval = PTR_ERR(path->dentry);
+ goto out_mnt;
+ }
+ return 0;
+out_mnt:
+ mntput(path->mnt);
+out_err:
+ return retval;
+}
+
+static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+ struct path *path)
+{
+ int retval = 0;
+ struct file_handle f_handle;
+ struct file_handle *handle = NULL;
+
+ /*
+ * With handle we don't look at the execute bit on the
+ * the directory. Ideally we would like CAP_DAC_SEARCH.
+ * But we don't have that
+ */
+ if (!capable(CAP_DAC_READ_SEARCH)) {
+ retval = -EPERM;
+ goto out_err;
+ }
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
+ retval = -EFAULT;
+ goto out_err;
+ }
+ if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
+ (f_handle.handle_bytes == 0)) {
+ retval = -EINVAL;
+ goto out_err;
+ }
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle) {
+ retval = -ENOMEM;
+ goto out_err;
+ }
+ /* copy the full handle */
+ if (copy_from_user(handle, ufh,
+ sizeof(struct file_handle) +
+ f_handle.handle_bytes)) {
+ retval = -EFAULT;
+ goto out_handle;
+ }
+
+ retval = do_handle_to_path(mountdirfd, handle, path);
+
+out_handle:
+ kfree(handle);
+out_err:
+ return retval;
+}
+
+long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag)
+{
+ long retval = 0;
+ struct path path;
+ struct file *file;
+ int fd;
+
+ retval = handle_to_path(mountdirfd, ufh, &path);
+ if (retval)
+ return retval;
+
+ fd = get_unused_fd_flags(open_flag);
+ if (fd < 0) {
+ path_put(&path);
+ return fd;
+ }
+ file = file_open_root(path.dentry, path.mnt, "", open_flag);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ retval = PTR_ERR(file);
+ } else {
+ retval = fd;
+ fsnotify_open(file);
+ fd_install(fd, file);
+ }
+ path_put(&path);
+ return retval;
+}
+
+/**
+ * sys_open_by_handle_at: Open the file handle
+ * @mountdirfd: directory file descriptor
+ * @handle: file handle to be opened
+ * @flag: open flags.
+ *
+ * @mountdirfd indicate the directory file descriptor
+ * of the mount point. file handle is decoded relative
+ * to the vfsmount pointed by the @mountdirfd. @flags
+ * value is same as the open(2) flags.
+ */
+SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
+ struct file_handle __user *, handle,
+ int, flags)
+{
+ long ret;
+
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+
+ ret = do_handle_open(mountdirfd, handle, flags);
+ return ret;
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e2..74a9544ac77 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (!atomic_long_inc_not_zero(&file->f_count)) {
- /* File object ref couldn't be taken */
- rcu_read_unlock();
- return NULL;
- }
+ /* File object ref couldn't be taken */
+ if (file->f_mode & FMODE_PATH ||
+ !atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
}
rcu_read_unlock();
@@ -289,6 +288,25 @@ struct file *fget(unsigned int fd)
EXPORT_SYMBOL(fget);
+struct file *fget_raw(unsigned int fd)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ /* File object ref couldn't be taken */
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ }
+ rcu_read_unlock();
+
+ return file;
+}
+
+EXPORT_SYMBOL(fget_raw);
+
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
*
@@ -313,6 +331,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
*fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = fcheck_files(files, fd);
+ if (file && (file->f_mode & FMODE_PATH))
+ file = NULL;
+ } else {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (!(file->f_mode & FMODE_PATH) &&
+ atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return file;
+}
+
+struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ *fput_needed = 0;
+ if (atomic_read(&files->count) == 1) {
+ file = fcheck_files(files, fd);
} else {
rcu_read_lock();
file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83543b5ff94..8bd0ef9286c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = entry->d_inode;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd..051b1a08452 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
u64 nodeid;
u32 generation;
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
nodeid = get_fuse_inode(inode)->nodeid;
generation = inode->i_generation;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b87..0da8da2c991 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
int error;
int had_lock = 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f..b5a5e60df0d 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
- if (*len < GFS2_SMALL_FH_SIZE ||
- (connectable && *len < GFS2_LARGE_FH_SIZE))
+ if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
+ *len = GFS2_LARGE_FH_SIZE;
return 255;
+ } else if (*len < GFS2_SMALL_FH_SIZE) {
+ *len = GFS2_SMALL_FH_SIZE;
+ return 255;
+ }
fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/internal.h b/fs/internal.h
index 9b976b57d7f..f3d15de44b1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,6 +106,19 @@ extern void put_super(struct super_block *sb);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+struct open_flags {
+ int open_flag;
+ int mode;
+ int acc_mode;
+ int intent;
+};
+extern struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int lookup_flags);
+extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
+ const char *, const struct open_flags *, int lookup_flags);
+
+extern long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag);
/*
* inode.c
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb3847..dd4687ff30d 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
* offset of the inode and the upper 16 bits of fh32[1] to
* hold the offset of the parent.
*/
-
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *max_len = 5;
+ return 255;
+ } else if (len < 3) {
+ *max_len = 3;
return 255;
+ }
len = 3;
fh32[0] = ei->i_iget5_block;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb..3f04a180493 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
if (ip->i_nlink == JFS_LINK_MAX)
return -EMLINK;
- if (ip->i_nlink == 0)
- return -ENOENT;
-
dquot_initialize(dir);
tid = txBegin(ip->i_sb, 0);
@@ -1600,7 +1597,7 @@ out:
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6..0a601cae23d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
return retval;
}
-char * getname(const char __user * filename)
+static char *getname_flags(const char __user * filename, int flags)
{
char *tmp, *result;
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
result = tmp;
if (retval < 0) {
- __putname(tmp);
- result = ERR_PTR(retval);
+ if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
+ __putname(tmp);
+ result = ERR_PTR(retval);
+ }
}
}
audit_getname(result);
return result;
}
+char *getname(const char __user * filename)
+{
+ return getname_flags(filename, 0);
+}
+
#ifdef CONFIG_AUDITSYSCALL
void putname(const char *name)
{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
struct dentry *dentry = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
goto err;
BUG_ON(nd->inode != dentry->d_inode);
spin_unlock(&dentry->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
err:
spin_unlock(&dentry->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
parent->d_count++;
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -490,7 +501,7 @@ err:
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -498,8 +509,16 @@ err_root:
/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
{
- if (nd->flags & LOOKUP_RCU)
- return nameidata_dentry_drop_rcu(nd, dentry);
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+ }
+ }
return 0;
}
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
spin_lock(&dentry->d_lock);
if (!__d_rcu_to_refcount(dentry, nd->seq))
goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
return -ECHILD;
}
-/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
-static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
-{
- if (likely(nd->flags & LOOKUP_RCU))
- return nameidata_drop_rcu_last(nd);
- return 0;
-}
-
/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
return dentry;
}
-static inline struct dentry *
-do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
-{
- int status = d_revalidate(dentry, nd);
- if (likely(status > 0))
- return dentry;
- if (status == -ECHILD) {
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return ERR_PTR(-ECHILD);
- return do_revalidate(dentry, nd);
- }
- if (status < 0)
- return ERR_PTR(status);
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return ERR_PTR(-ECHILD);
- if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- return dentry;
-}
-
-static inline int need_reval_dot(struct dentry *dentry)
-{
- if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
- return 0;
-
- if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
- return 0;
-
- return 1;
-}
-
/*
- * force_reval_path - force revalidation of a dentry
+ * handle_reval_path - force revalidation of a dentry
*
* In some situations the path walking code will trust dentries without
* revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
* invalidate the dentry. It's up to the caller to handle putting references
* to the path if necessary.
*/
-static int
-force_reval_path(struct path *path, struct nameidata *nd)
+static inline int handle_reval_path(struct nameidata *nd)
{
+ struct dentry *dentry = nd->path.dentry;
int status;
- struct dentry *dentry = path->dentry;
- /*
- * only check on filesystems where it's possible for the dentry to
- * become stale.
- */
- if (!need_reval_dot(dentry))
+ if (likely(!(nd->flags & LOOKUP_JUMPED)))
+ return 0;
+
+ if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+ return 0;
+
+ if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
return 0;
+ /* Note: we do not d_invalidate() */
status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
- if (!status) {
- d_invalidate(dentry);
+ if (!status)
status = -ESTALE;
- }
+
return status;
}
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
path_put(&nd->path);
nd->path = nd->root;
path_get(&nd->root);
+ nd->flags |= LOOKUP_JUMPED;
}
nd->inode = nd->path.dentry->d_inode;
@@ -757,20 +737,44 @@ static inline void path_to_nameidata(const struct path *path,
nd->path.dentry = path->dentry;
}
+static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
+{
+ struct inode *inode = link->dentry->d_inode;
+ if (!IS_ERR(cookie) && inode->i_op->put_link)
+ inode->i_op->put_link(link->dentry, nd, cookie);
+ path_put(link);
+}
+
static __always_inline int
-__do_follow_link(const struct path *link, struct nameidata *nd, void **p)
+follow_link(struct path *link, struct nameidata *nd, void **p)
{
int error;
struct dentry *dentry = link->dentry;
BUG_ON(nd->flags & LOOKUP_RCU);
+ if (unlikely(current->total_link_count >= 40)) {
+ *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
+ path_put_conditional(link, nd);
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+ cond_resched();
+ current->total_link_count++;
+
touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
if (link->mnt == nd->path.mnt)
mntget(link->mnt);
+ error = security_inode_follow_link(link->dentry, nd);
+ if (error) {
+ *p = ERR_PTR(error); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return error;
+ }
+
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(*p);
@@ -780,56 +784,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
if (s)
error = __vfs_follow_link(nd, s);
else if (nd->last_type == LAST_BIND) {
- error = force_reval_path(&nd->path, nd);
- if (error)
+ nd->flags |= LOOKUP_JUMPED;
+ nd->inode = nd->path.dentry->d_inode;
+ if (nd->inode->i_op->follow_link) {
+ /* stepped on a _really_ weird one */
path_put(&nd->path);
+ error = -ELOOP;
+ }
}
}
return error;
}
-/*
- * This limits recursive symlink follows to 8, while
- * limiting consecutive symlinks to 40.
- *
- * Without that kind of total limit, nasty chains of consecutive
- * symlinks can cause almost arbitrarily long lookups.
- */
-static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
-{
- void *cookie;
- int err = -ELOOP;
-
- /* We drop rcu-walk here */
- if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
- return -ECHILD;
- BUG_ON(inode != path->dentry->d_inode);
-
- if (current->link_count >= MAX_NESTED_LINKS)
- goto loop;
- if (current->total_link_count >= 40)
- goto loop;
- BUG_ON(nd->depth >= MAX_NESTED_LINKS);
- cond_resched();
- err = security_inode_follow_link(path->dentry, nd);
- if (err)
- goto loop;
- current->link_count++;
- current->total_link_count++;
- nd->depth++;
- err = __do_follow_link(path, nd, &cookie);
- if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
- path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
- path_put(path);
- current->link_count--;
- nd->depth--;
- return err;
-loop:
- path_put_conditional(path, nd);
- path_put(&nd->path);
- return err;
-}
-
static int follow_up_rcu(struct path *path)
{
struct vfsmount *parent;
@@ -1068,7 +1034,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
seq = read_seqcount_begin(&parent->d_seq);
if (read_seqcount_retry(&old->d_seq, nd->seq))
- return -ECHILD;
+ goto failed;
inode = parent->d_inode;
nd->path.dentry = parent;
nd->seq = seq;
@@ -1081,8 +1047,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
}
__follow_mount_rcu(nd, &nd->path, &inode, true);
nd->inode = inode;
-
return 0;
+
+failed:
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
}
/*
@@ -1216,68 +1189,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
- struct inode *dir;
+ int need_reval = 1;
+ int status = 1;
int err;
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- err = parent->d_op->d_hash(parent, nd->inode, name);
- if (err < 0)
- return err;
- }
-
- /*
* Rename seqlock is not required here because in the off chance
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
-
*inode = nd->inode;
dentry = __d_lookup_rcu(parent, name, &seq, inode);
- if (!dentry) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto need_lookup;
- }
+ if (!dentry)
+ goto unlazy;
+
/* Memory barrier in read_seqcount_begin of child is enough */
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
-
nd->seq = seq;
+
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- dentry = do_revalidate_rcu(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
- if (!(nd->flags & LOOKUP_RCU))
- goto done;
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status != -ECHILD)
+ need_reval = 0;
+ goto unlazy;
+ }
}
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, false)))
return 0;
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- /* fallthru */
+unlazy:
+ if (dentry) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return -ECHILD;
+ } else {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ } else {
+ dentry = __d_lookup(parent, name);
}
- dentry = __d_lookup(parent, name);
- if (!dentry)
- goto need_lookup;
-found:
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
+
+retry:
+ if (unlikely(!dentry)) {
+ struct inode *dir = parent->d_inode;
+ BUG_ON(nd->inode != dir);
+
+ mutex_lock(&dir->i_mutex);
+ dentry = d_lookup(parent, name);
+ if (likely(!dentry)) {
+ dentry = d_alloc_and_lookup(parent, name, nd);
+ if (IS_ERR(dentry)) {
+ mutex_unlock(&dir->i_mutex);
+ return PTR_ERR(dentry);
+ }
+ /* known good */
+ need_reval = 0;
+ status = 1;
+ }
+ mutex_unlock(&dir->i_mutex);
}
-done:
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status < 0) {
+ dput(dentry);
+ return status;
+ }
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ need_reval = 1;
+ goto retry;
+ }
+ }
+
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
@@ -1287,39 +1277,113 @@ done:
}
*inode = path->dentry->d_inode;
return 0;
+}
-need_lookup:
- dir = parent->d_inode;
- BUG_ON(nd->inode != dir);
+static inline int may_lookup(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ if (err != -ECHILD)
+ return err;
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ return exec_permission(nd->inode, 0);
+}
- mutex_lock(&dir->i_mutex);
- /*
- * First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore, or the first
- * lookup failed due to an unrelated rename.
- *
- * This could use version numbering or similar to avoid unnecessary
- * cache lookups, but then we'd have to do the first lookup in the
- * non-racy way. However in the common case here, everything should
- * be hot in cache, so would it be a big win?
- */
- dentry = d_lookup(parent, name);
- if (likely(!dentry)) {
- dentry = d_alloc_and_lookup(parent, name, nd);
- mutex_unlock(&dir->i_mutex);
- if (IS_ERR(dentry))
- goto fail;
- goto done;
+static inline int handle_dots(struct nameidata *nd, int type)
+{
+ if (type == LAST_DOTDOT) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
+ }
+ return 0;
+}
+
+static void terminate_walk(struct nameidata *nd)
+{
+ if (!(nd->flags & LOOKUP_RCU)) {
+ path_put(&nd->path);
+ } else {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
}
+}
+
+static inline int walk_component(struct nameidata *nd, struct path *path,
+ struct qstr *name, int type, int follow)
+{
+ struct inode *inode;
+ int err;
/*
- * Uhhuh! Nasty case: the cache was re-populated while
- * we waited on the semaphore. Need to revalidate.
+ * "." and ".." are special - ".." especially so because it has
+ * to be able to know about the current root directory and
+ * parent relationships.
*/
- mutex_unlock(&dir->i_mutex);
- goto found;
+ if (unlikely(type != LAST_NORM))
+ return handle_dots(nd, type);
+ err = do_lookup(nd, name, path, &inode);
+ if (unlikely(err)) {
+ terminate_walk(nd);
+ return err;
+ }
+ if (!inode) {
+ path_to_nameidata(path, nd);
+ terminate_walk(nd);
+ return -ENOENT;
+ }
+ if (unlikely(inode->i_op->follow_link) && follow) {
+ if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
+ return -ECHILD;
+ BUG_ON(inode != path->dentry->d_inode);
+ return 1;
+ }
+ path_to_nameidata(path, nd);
+ nd->inode = inode;
+ return 0;
+}
-fail:
- return PTR_ERR(dentry);
+/*
+ * This limits recursive symlink follows to 8, while
+ * limiting consecutive symlinks to 40.
+ *
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups.
+ */
+static inline int nested_symlink(struct path *path, struct nameidata *nd)
+{
+ int res;
+
+ BUG_ON(nd->depth >= MAX_NESTED_LINKS);
+ if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
+ path_put_conditional(path, nd);
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+
+ nd->depth++;
+ current->link_count++;
+
+ do {
+ struct path link = *path;
+ void *cookie;
+
+ res = follow_link(&link, nd, &cookie);
+ if (!res)
+ res = walk_component(nd, path, &nd->last,
+ nd->last_type, LOOKUP_FOLLOW);
+ put_link(nd, &link, cookie);
+ } while (res > 0);
+
+ current->link_count--;
+ nd->depth--;
+ return res;
}
/*
@@ -1339,30 +1403,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
while (*name=='/')
name++;
if (!*name)
- goto return_reval;
-
- if (nd->depth)
- lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
+ return 0;
/* At this point we know we have a real path component. */
for(;;) {
- struct inode *inode;
unsigned long hash;
struct qstr this;
unsigned int c;
+ int type;
nd->flags |= LOOKUP_CONTINUE;
- if (nd->flags & LOOKUP_RCU) {
- err = exec_permission(nd->inode, IPERM_FLAG_RCU);
- if (err == -ECHILD) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto exec_again;
- }
- } else {
-exec_again:
- err = exec_permission(nd->inode, 0);
- }
+
+ err = may_lookup(nd);
if (err)
break;
@@ -1378,52 +1430,43 @@ exec_again:
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
+ type = LAST_NORM;
+ if (this.name[0] == '.') switch (this.len) {
+ case 2:
+ if (this.name[1] == '.') {
+ type = LAST_DOTDOT;
+ nd->flags |= LOOKUP_JUMPED;
+ }
+ break;
+ case 1:
+ type = LAST_DOT;
+ }
+ if (likely(type == LAST_NORM)) {
+ struct dentry *parent = nd->path.dentry;
+ nd->flags &= ~LOOKUP_JUMPED;
+ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
+ err = parent->d_op->d_hash(parent, nd->inode,
+ &this);
+ if (err < 0)
+ break;
+ }
+ }
+
/* remove trailing slashes? */
if (!c)
goto last_component;
while (*++name == '/');
if (!*name)
- goto last_with_slashes;
+ goto last_component;
- /*
- * "." and ".." are special - ".." especially so because it has
- * to be able to know about the current root directory and
- * parent relationships.
- */
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- continue;
- }
- /* This does the actual lookups.. */
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- err = -ENOENT;
- if (!inode)
- goto out_dput;
+ err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
+ if (err < 0)
+ return err;
- if (inode->i_op->follow_link) {
- err = do_follow_link(inode, &next, nd);
+ if (err) {
+ err = nested_symlink(&next, nd);
if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- err = -ENOENT;
- if (!nd->inode)
- break;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
+ return err;
}
err = -ENOTDIR;
if (!nd->inode->i_op->lookup)
@@ -1431,209 +1474,109 @@ exec_again:
continue;
/* here ends the main loop */
-last_with_slashes:
- lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
/* Clear LOOKUP_CONTINUE iff it was previously unset */
nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
- if (lookup_flags & LOOKUP_PARENT)
- goto lookup_parent;
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- goto return_reval;
- }
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- if (inode && unlikely(inode->i_op->follow_link) &&
- (lookup_flags & LOOKUP_FOLLOW)) {
- err = do_follow_link(inode, &next, nd);
- if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
- }
- err = -ENOENT;
- if (!nd->inode)
- break;
- if (lookup_flags & LOOKUP_DIRECTORY) {
- err = -ENOTDIR;
- if (!nd->inode->i_op->lookup)
- break;
- }
- goto return_base;
-lookup_parent:
nd->last = this;
- nd->last_type = LAST_NORM;
- if (this.name[0] != '.')
- goto return_base;
- if (this.len == 1)
- nd->last_type = LAST_DOT;
- else if (this.len == 2 && this.name[1] == '.')
- nd->last_type = LAST_DOTDOT;
- else
- goto return_base;
-return_reval:
- /*
- * We bypassed the ordinary revalidation routines.
- * We may need to check the cached dentry for staleness.
- */
- if (need_reval_dot(nd->path.dentry)) {
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
- /* Note: we do not d_invalidate() */
- err = d_revalidate(nd->path.dentry, nd);
- if (!err)
- err = -ESTALE;
- if (err < 0)
- break;
- return 0;
- }
-return_base:
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
+ nd->last_type = type;
return 0;
-out_dput:
- if (!(nd->flags & LOOKUP_RCU))
- path_put_conditional(&next, nd);
- break;
}
- if (!(nd->flags & LOOKUP_RCU))
- path_put(&nd->path);
-return_err:
+ terminate_walk(nd);
return err;
}
-static inline int path_walk_rcu(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static inline int path_walk_simple(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static int path_walk(const char *name, struct nameidata *nd)
-{
- struct path save = nd->path;
- int result;
-
- current->total_link_count = 0;
-
- /* make sure the stuff we saved doesn't go away */
- path_get(&save);
-
- result = link_path_walk(name, nd);
- if (result == -ESTALE) {
- /* nd->path had been dropped */
- current->total_link_count = 0;
- nd->path = save;
- path_get(&nd->path);
- nd->flags |= LOOKUP_REVAL;
- result = link_path_walk(name, nd);
- }
-
- path_put(&save);
-
- return result;
-}
-
-static void path_finish_rcu(struct nameidata *nd)
-{
- if (nd->flags & LOOKUP_RCU) {
- /* RCU dangling. Cancel it. */
- nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- }
- if (nd->file)
- fput(nd->file);
-}
-
-static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags,
+ struct nameidata *nd, struct file **fp)
{
int retval = 0;
int fput_needed;
struct file *file;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags | LOOKUP_RCU;
+ nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
+ if (flags & LOOKUP_ROOT) {
+ struct inode *inode = nd->root.dentry->d_inode;
+ if (*name) {
+ if (!inode->i_op->lookup)
+ return -ENOTDIR;
+ retval = inode_permission(inode, MAY_EXEC);
+ if (retval)
+ return retval;
+ }
+ nd->path = nd->root;
+ nd->inode = inode;
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } else {
+ path_get(&nd->path);
+ }
+ return 0;
+ }
+
nd->root.mnt = NULL;
- nd->file = NULL;
if (*name=='/') {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->root = fs->root;
- nd->path = nd->root;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
-
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ set_root_rcu(nd);
+ } else {
+ set_root(nd);
+ path_get(&nd->root);
+ }
+ nd->path = nd->root;
} else if (dfd == AT_FDCWD) {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->path = fs->pwd;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->path = fs->pwd;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+ } else {
+ get_fs_pwd(current->fs, &nd->path);
+ }
} else {
struct dentry *dentry;
- file = fget_light(dfd, &fput_needed);
+ file = fget_raw_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
dentry = file->f_path.dentry;
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (*name) {
+ retval = -ENOTDIR;
+ if (!S_ISDIR(dentry->d_inode->i_mode))
+ goto fput_fail;
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
+ retval = file_permission(file, MAY_EXEC);
+ if (retval)
+ goto fput_fail;
+ }
nd->path = file->f_path;
- if (fput_needed)
- nd->file = file;
-
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ if (fput_needed)
+ *fp = file;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ } else {
+ path_get(&file->f_path);
+ fput_light(file, fput_needed);
+ }
}
+
nd->inode = nd->path.dentry->d_inode;
return 0;
@@ -1643,60 +1586,23 @@ out_fail:
return retval;
}
-static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static inline int lookup_last(struct nameidata *nd, struct path *path)
{
- int retval = 0;
- int fput_needed;
- struct file *file;
+ if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags;
- nd->depth = 0;
- nd->root.mnt = NULL;
-
- if (*name=='/') {
- set_root(nd);
- nd->path = nd->root;
- path_get(&nd->root);
- } else if (dfd == AT_FDCWD) {
- get_fs_pwd(current->fs, &nd->path);
- } else {
- struct dentry *dentry;
-
- file = fget_light(dfd, &fput_needed);
- retval = -EBADF;
- if (!file)
- goto out_fail;
-
- dentry = file->f_path.dentry;
-
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
-
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
-
- nd->path = file->f_path;
- path_get(&file->f_path);
-
- fput_light(file, fput_needed);
- }
- nd->inode = nd->path.dentry->d_inode;
- return 0;
-
-fput_fail:
- fput_light(file, fput_needed);
-out_fail:
- return retval;
+ nd->flags &= ~LOOKUP_PARENT;
+ return walk_component(nd, path, &nd->last, nd->last_type,
+ nd->flags & LOOKUP_FOLLOW);
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
+static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval;
+ struct file *base = NULL;
+ struct path path;
+ int err;
/*
* Path walking is largely split up into 2 different synchronisation
@@ -1712,44 +1618,75 @@ static int do_path_lookup(int dfd, const char *name,
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
- retval = path_init_rcu(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk_rcu(name, nd);
- path_finish_rcu(nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
+
+ if (unlikely(err))
+ return err;
+
+ current->total_link_count = 0;
+ err = link_path_walk(name, nd);
+
+ if (!err && !(flags & LOOKUP_PARENT)) {
+ err = lookup_last(nd, &path);
+ while (err > 0) {
+ void *cookie;
+ struct path link = path;
+ nd->flags |= LOOKUP_PARENT;
+ err = follow_link(&link, nd, &cookie);
+ if (!err)
+ err = lookup_last(nd, &path);
+ put_link(nd, &link, cookie);
+ }
}
- if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
- /* slower, locked walk */
- if (retval == -ESTALE)
- flags |= LOOKUP_REVAL;
- retval = path_init(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk(name, nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ if (nd->flags & LOOKUP_RCU) {
+ /* went all way through without dropping RCU */
+ BUG_ON(err);
+ if (nameidata_drop_rcu_last(nd))
+ err = -ECHILD;
+ }
+
+ if (!err)
+ err = handle_reval_path(nd);
+
+ if (!err && nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup) {
+ path_put(&nd->path);
+ return -ENOTDIR;
}
}
+ if (base)
+ fput(base);
+
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ return err;
+}
+
+static int do_path_lookup(int dfd, const char *name,
+ unsigned int flags, struct nameidata *nd)
+{
+ int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
+ if (unlikely(retval == -ECHILD))
+ retval = path_lookupat(dfd, name, flags, nd);
+ if (unlikely(retval == -ESTALE))
+ retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+
if (likely(!retval)) {
if (unlikely(!audit_dummy_context())) {
if (nd->path.dentry && nd->inode)
audit_inode(name, nd->path.dentry);
}
}
-
return retval;
}
-int path_lookup(const char *name, unsigned int flags,
- struct nameidata *nd)
+int kern_path_parent(const char *name, struct nameidata *nd)
{
- return do_path_lookup(AT_FDCWD, name, flags, nd);
+ return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
}
int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1773,29 +1710,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct nameidata *nd)
{
- int retval;
-
- /* same as do_path_lookup */
- nd->last_type = LAST_ROOT;
- nd->flags = flags;
- nd->depth = 0;
-
- nd->path.dentry = dentry;
- nd->path.mnt = mnt;
- path_get(&nd->path);
- nd->root = nd->path;
- path_get(&nd->root);
- nd->inode = nd->path.dentry->d_inode;
-
- retval = path_walk(name, nd);
- if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->inode))
- audit_inode(name, nd->path.dentry);
-
- path_put(&nd->root);
- nd->root.mnt = NULL;
-
- return retval;
+ nd->root.dentry = dentry;
+ nd->root.mnt = mnt;
+ /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
+ return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
}
static struct dentry *__lookup_hash(struct qstr *name,
@@ -1810,17 +1728,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
return ERR_PTR(err);
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (base->d_flags & DCACHE_OP_HASH) {
- err = base->d_op->d_hash(base, inode, name);
- dentry = ERR_PTR(err);
- if (err < 0)
- goto out;
- }
-
- /*
* Don't bother with __d_lookup: callers are for creat as
* well as unlink, so a lot of the time it would cost
* a double lookup.
@@ -1832,7 +1739,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
if (!dentry)
dentry = d_alloc_and_lookup(base, name, nd);
-out:
+
return dentry;
}
@@ -1846,28 +1753,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
-static int __lookup_one_len(const char *name, struct qstr *this,
- struct dentry *base, int len)
-{
- unsigned long hash;
- unsigned int c;
-
- this->name = name;
- this->len = len;
- if (!len)
- return -EACCES;
-
- hash = init_name_hash();
- while (len--) {
- c = *(const unsigned char *)name++;
- if (c == '/' || c == '\0')
- return -EACCES;
- hash = partial_name_hash(c, hash);
- }
- this->hash = end_name_hash(hash);
- return 0;
-}
-
/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
@@ -1881,14 +1766,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
- int err;
struct qstr this;
+ unsigned long hash;
+ unsigned int c;
WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
- err = __lookup_one_len(name, &this, base, len);
- if (err)
- return ERR_PTR(err);
+ this.name = name;
+ this.len = len;
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ hash = init_name_hash();
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ hash = partial_name_hash(c, hash);
+ }
+ this.hash = end_name_hash(hash);
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, base->d_inode, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
return __lookup_hash(&this, base, NULL);
}
@@ -1897,7 +1802,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
struct path *path)
{
struct nameidata nd;
- char *tmp = getname(name);
+ char *tmp = getname_flags(name, flags);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
@@ -2077,12 +1982,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return error;
}
-int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
int error;
+ /* O_PATH? */
+ if (!acc_mode)
+ return 0;
+
if (!inode)
return -ENOENT;
@@ -2151,34 +2060,6 @@ static int handle_truncate(struct file *filp)
}
/*
- * Be careful about ever adding any more callers of this
- * function. Its flags must be in the namei format, not
- * what get passed to sys_open().
- */
-static int __open_namei_create(struct nameidata *nd, struct path *path,
- int open_flag, int mode)
-{
- int error;
- struct dentry *dir = nd->path.dentry;
-
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&nd->path, path->dentry, mode, 0);
- if (error)
- goto out_unlock;
- error = vfs_create(dir->d_inode, path->dentry, mode, nd);
-out_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
- dput(nd->path.dentry);
- nd->path.dentry = path->dentry;
-
- if (error)
- return error;
- /* Don't check for write permission, don't truncate */
- return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
-}
-
-/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
@@ -2202,126 +2083,115 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int open_will_truncate(int flag, struct inode *inode)
-{
- /*
- * We'll never write to the fs underlying
- * a device file.
- */
- if (special_file(inode->i_mode))
- return 0;
- return (flag & O_TRUNC);
-}
-
-static struct file *finish_open(struct nameidata *nd,
- int open_flag, int acc_mode)
-{
- struct file *filp;
- int will_truncate;
- int error;
-
- will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
- if (will_truncate) {
- error = mnt_want_write(nd->path.mnt);
- if (error)
- goto exit;
- }
- error = may_open(&nd->path, acc_mode, open_flag);
- if (error) {
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- if (!IS_ERR(filp)) {
- if (will_truncate) {
- error = handle_truncate(filp);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- }
- /*
- * It is now safe to drop the mnt write
- * because the filp has had a write taken
- * on its behalf.
- */
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- return filp;
-
-exit:
- path_put(&nd->path);
- return ERR_PTR(error);
-}
-
/*
- * Handle O_CREAT case for do_filp_open
+ * Handle the last step of open()
*/
static struct file *do_last(struct nameidata *nd, struct path *path,
- int open_flag, int acc_mode,
- int mode, const char *pathname)
+ const struct open_flags *op, const char *pathname)
{
struct dentry *dir = nd->path.dentry;
+ struct dentry *dentry;
+ int open_flag = op->open_flag;
+ int will_truncate = open_flag & O_TRUNC;
+ int want_write = 0;
+ int acc_mode = op->acc_mode;
struct file *filp;
- int error = -EISDIR;
+ int error;
+
+ nd->flags &= ~LOOKUP_PARENT;
+ nd->flags |= op->intent;
switch (nd->last_type) {
case LAST_DOTDOT:
- follow_dotdot(nd);
- dir = nd->path.dentry;
case LAST_DOT:
- if (need_reval_dot(dir)) {
- int status = d_revalidate(nd->path.dentry, nd);
- if (!status)
- status = -ESTALE;
- if (status < 0) {
- error = status;
- goto exit;
- }
- }
+ error = handle_dots(nd, nd->last_type);
+ if (error)
+ return ERR_PTR(error);
/* fallthrough */
case LAST_ROOT:
- goto exit;
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
+ audit_inode(pathname, nd->path.dentry);
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ goto exit;
+ }
+ goto ok;
case LAST_BIND:
+ /* can't be RCU mode here */
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
audit_inode(pathname, dir);
goto ok;
}
+ if (!(open_flag & O_CREAT)) {
+ int symlink_ok = 0;
+ if (nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+ if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
+ symlink_ok = 1;
+ /* we _can_ be in RCU mode here */
+ error = walk_component(nd, path, &nd->last, LAST_NORM,
+ !symlink_ok);
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error) /* symlink */
+ return NULL;
+ /* sayonara */
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ error = -ENOTDIR;
+ if (nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup)
+ goto exit;
+ }
+ audit_inode(pathname, nd->path.dentry);
+ goto ok;
+ }
+
+ /* create side of things */
+
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ audit_inode(pathname, dir);
+ error = -EISDIR;
/* trailing slashes? */
if (nd->last.name[nd->last.len])
goto exit;
mutex_lock(&dir->d_inode->i_mutex);
- path->dentry = lookup_hash(nd);
- path->mnt = nd->path.mnt;
-
- error = PTR_ERR(path->dentry);
- if (IS_ERR(path->dentry)) {
+ dentry = lookup_hash(nd);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
goto exit;
}
- if (IS_ERR(nd->intent.open.file)) {
- error = PTR_ERR(nd->intent.open.file);
- goto exit_mutex_unlock;
- }
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
/* Negative dentry, just create the file */
- if (!path->dentry->d_inode) {
+ if (!dentry->d_inode) {
+ int mode = op->mode;
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current_umask();
/*
* This write is needed to ensure that a
- * ro->rw transition does not occur between
+ * rw->ro transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
@@ -2329,22 +2199,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
error = mnt_want_write(nd->path.mnt);
if (error)
goto exit_mutex_unlock;
- error = __open_namei_create(nd, path, open_flag, mode);
- if (error) {
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- return filp;
+ want_write = 1;
+ /* Don't check for write permission, don't truncate */
+ open_flag &= ~O_TRUNC;
+ will_truncate = 0;
+ acc_mode = MAY_OPEN;
+ error = security_path_mknod(&nd->path, dentry, mode, 0);
+ if (error)
+ goto exit_mutex_unlock;
+ error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (error)
+ goto exit_mutex_unlock;
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(nd->path.dentry);
+ nd->path.dentry = dentry;
+ goto common;
}
/*
@@ -2374,7 +2243,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (S_ISDIR(nd->inode->i_mode))
goto exit;
ok:
- filp = finish_open(nd, open_flag, acc_mode);
+ if (!S_ISREG(nd->inode->i_mode))
+ will_truncate = 0;
+
+ if (will_truncate) {
+ error = mnt_want_write(nd->path.mnt);
+ if (error)
+ goto exit;
+ want_write = 1;
+ }
+common:
+ error = may_open(&nd->path, acc_mode, open_flag);
+ if (error)
+ goto exit;
+ filp = nameidata_to_filp(nd);
+ if (!IS_ERR(filp)) {
+ error = ima_file_check(filp, op->acc_mode);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ if (!IS_ERR(filp)) {
+ if (will_truncate) {
+ error = handle_truncate(filp);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ }
+out:
+ if (want_write)
+ mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
return filp;
exit_mutex_unlock:
@@ -2382,197 +2284,103 @@ exit_mutex_unlock:
exit_dput:
path_put_conditional(path, nd);
exit:
- path_put(&nd->path);
- return ERR_PTR(error);
+ filp = ERR_PTR(error);
+ goto out;
}
-/*
- * Note that the low bits of the passed in "open_flag"
- * are not the same as in the local variable "flag". See
- * open_to_namei_flags() for more details.
- */
-struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode)
+static struct file *path_openat(int dfd, const char *pathname,
+ struct nameidata *nd, const struct open_flags *op, int flags)
{
+ struct file *base = NULL;
struct file *filp;
- struct nameidata nd;
- int error;
struct path path;
- int count = 0;
- int flag = open_to_namei_flags(open_flag);
- int flags;
-
- if (!(open_flag & O_CREAT))
- mode = 0;
-
- /* Must never be set by userspace */
- open_flag &= ~FMODE_NONOTIFY;
-
- /*
- * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
- * check for O_DSYNC if the need any syncing at all we enforce it's
- * always set instead of having to deal with possibly weird behaviour
- * for malicious applications setting only __O_SYNC.
- */
- if (open_flag & __O_SYNC)
- open_flag |= O_DSYNC;
-
- if (!acc_mode)
- acc_mode = MAY_OPEN | ACC_MODE(open_flag);
-
- /* O_TRUNC implies we need access checks for write permissions */
- if (open_flag & O_TRUNC)
- acc_mode |= MAY_WRITE;
-
- /* Allow the LSM permission hook to distinguish append
- access from general write access. */
- if (open_flag & O_APPEND)
- acc_mode |= MAY_APPEND;
-
- flags = LOOKUP_OPEN;
- if (open_flag & O_CREAT) {
- flags |= LOOKUP_CREATE;
- if (open_flag & O_EXCL)
- flags |= LOOKUP_EXCL;
- }
- if (open_flag & O_DIRECTORY)
- flags |= LOOKUP_DIRECTORY;
- if (!(open_flag & O_NOFOLLOW))
- flags |= LOOKUP_FOLLOW;
+ int error;
filp = get_empty_filp();
if (!filp)
return ERR_PTR(-ENFILE);
- filp->f_flags = open_flag;
- nd.intent.open.file = filp;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
-
- if (open_flag & O_CREAT)
- goto creat;
+ filp->f_flags = op->open_flag;
+ nd->intent.open.file = filp;
+ nd->intent.open.flags = open_to_namei_flags(op->open_flag);
+ nd->intent.open.create_mode = op->mode;
- /* !O_CREAT, simple open */
- error = do_path_lookup(dfd, pathname, flags, &nd);
+ error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out_filp;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW)) {
- if (nd.inode->i_op->follow_link)
- goto out_path;
- }
- error = -ENOTDIR;
- if (nd.flags & LOOKUP_DIRECTORY) {
- if (!nd.inode->i_op->lookup)
- goto out_path;
- }
- audit_inode(pathname, nd.path.dentry);
- filp = finish_open(&nd, open_flag, acc_mode);
- release_open_intent(&nd);
- return filp;
-creat:
- /* OK, have to create the file. Find the parent. */
- error = path_init_rcu(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
- error = path_walk_rcu(pathname, &nd);
- path_finish_rcu(&nd);
- if (unlikely(error == -ECHILD || error == -ESTALE)) {
- /* slower, locked walk */
- if (error == -ESTALE) {
-reval:
- flags |= LOOKUP_REVAL;
- }
- error = path_init(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
-
- error = path_walk_simple(pathname, &nd);
- }
+ current->total_link_count = 0;
+ error = link_path_walk(pathname, nd);
if (unlikely(error))
goto out_filp;
- if (unlikely(!audit_dummy_context()))
- audit_inode(pathname, nd.path.dentry);
- /*
- * We have the parent and last component.
- */
- nd.flags = flags;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
+ filp = do_last(nd, &path, op, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path link = path;
- struct inode *linki = link.dentry->d_inode;
void *cookie;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW))
- goto exit_dput;
- if (count++ == 32)
- goto exit_dput;
- /*
- * This is subtle. Instead of calling do_follow_link() we do
- * the thing by hands. The reason is that this way we have zero
- * link_count and path_walk() (called from ->follow_link)
- * honoring LOOKUP_PARENT. After that we have the parent and
- * last component, i.e. we are in the same situation as after
- * the first path_walk(). Well, almost - if the last component
- * is normal we get its copy stored in nd->last.name and we will
- * have to putname() it when we are done. Procfs-like symlinks
- * just set LAST_BIND.
- */
- nd.flags |= LOOKUP_PARENT;
- error = security_inode_follow_link(link.dentry, &nd);
- if (error)
- goto exit_dput;
- error = __do_follow_link(&link, &nd, &cookie);
- if (unlikely(error)) {
- if (!IS_ERR(cookie) && linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- /* nd.path had been dropped */
- nd.path = link;
- goto out_path;
+ if (!(nd->flags & LOOKUP_FOLLOW)) {
+ path_put_conditional(&path, nd);
+ path_put(&nd->path);
+ filp = ERR_PTR(-ELOOP);
+ break;
}
- nd.flags &= ~LOOKUP_PARENT;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- path_put(&link);
+ nd->flags |= LOOKUP_PARENT;
+ nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+ error = follow_link(&link, nd, &cookie);
+ if (unlikely(error))
+ filp = ERR_PTR(error);
+ else
+ filp = do_last(nd, &path, op, pathname);
+ put_link(nd, &link, cookie);
}
out:
- if (nd.root.mnt)
- path_put(&nd.root);
- if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
- goto reval;
- release_open_intent(&nd);
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
+ path_put(&nd->root);
+ if (base)
+ fput(base);
+ release_open_intent(nd);
return filp;
-exit_dput:
- path_put_conditional(&path, &nd);
-out_path:
- path_put(&nd.path);
out_filp:
filp = ERR_PTR(error);
goto out;
}
-/**
- * filp_open - open file and return file pointer
- *
- * @filename: path to open
- * @flags: open flags as per the open(2) second argument
- * @mode: mode for the new file if O_CREAT is set, else ignored
- *
- * This is the helper to open a file from kernelspace if you really
- * have to. But in generally you should not do this, so please move
- * along, nothing to see here..
- */
-struct file *filp_open(const char *filename, int flags, int mode)
+struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int flags)
+{
+ struct nameidata nd;
+ struct file *filp;
+
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(filp == ERR_PTR(-ECHILD)))
+ filp = path_openat(dfd, pathname, &nd, op, flags);
+ if (unlikely(filp == ERR_PTR(-ESTALE)))
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
+ return filp;
+}
+
+struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *name, const struct open_flags *op, int flags)
{
- return do_filp_open(AT_FDCWD, filename, flags, mode, 0);
+ struct nameidata nd;
+ struct file *file;
+
+ nd.root.mnt = mnt;
+ nd.root.dentry = dentry;
+
+ flags |= LOOKUP_ROOT;
+
+ if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+ return ERR_PTR(-ELOOP);
+
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(file == ERR_PTR(-ECHILD)))
+ file = path_openat(-1, name, &nd, op, flags);
+ if (unlikely(file == ERR_PTR(-ESTALE)))
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
+ return file;
}
-EXPORT_SYMBOL(filp_open);
/**
* lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3111,7 +2919,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
return error;
mutex_lock(&inode->i_mutex);
- error = dir->i_op->link(old_dentry, dir, new_dentry);
+ /* Make sure we don't allow creating hardlink to an unlinked file */
+ if (inode->i_nlink == 0)
+ error = -ENOENT;
+ else
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
mutex_unlock(&inode->i_mutex);
if (!error)
fsnotify_link(dir, inode, new_dentry);
@@ -3133,15 +2945,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
struct dentry *new_dentry;
struct nameidata nd;
struct path old_path;
+ int how = 0;
int error;
char *to;
- if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
return -EINVAL;
+ /*
+ * To use null names we require CAP_DAC_READ_SEARCH
+ * This ensures that not everyone will be able to create
+ * handlink using the passed filedescriptor.
+ */
+ if (flags & AT_EMPTY_PATH) {
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return -ENOENT;
+ how = LOOKUP_EMPTY;
+ }
+
+ if (flags & AT_SYMLINK_FOLLOW)
+ how |= LOOKUP_FOLLOW;
- error = user_path_at(olddfd, oldname,
- flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
- &old_path);
+ error = user_path_at(olddfd, oldname, how, &old_path);
if (error)
return error;
@@ -3578,7 +3402,7 @@ EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(__page_symlink);
EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(path_lookup);
+EXPORT_SYMBOL(kern_path_parent);
EXPORT_SYMBOL(kern_path);
EXPORT_SYMBOL(vfs_path_lookup);
EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index d1edf26025d..dffe6f49ab9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int uuid_is_nil(u8 *uuid)
+{
+ int i;
+ u8 *cp = (u8 *)uuid;
+
+ for (i = 0; i < 16; i++) {
+ if (*cp++)
+ return 0;
+ }
+ return 1;
+}
+
static int show_mountinfo(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
if (IS_MNT_UNBINDABLE(mnt))
seq_puts(m, " unbindable");
+ if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
+ /* print the uuid */
+ seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
+
/* Filesystem specific data */
seq_puts(m, " - ");
show_type(m, sb);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb..2f8e61816d7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
*/
u64 nfs_compat_user_ino64(u64 fileid)
{
- int ino;
+#ifdef CONFIG_COMPAT
+ compat_ulong_t ino;
+#else
+ unsigned long ino;
+#endif
if (enable_ino64)
return fileid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a747407314..1be36cf65bf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+#else
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
-extern void nfs4_schedule_state_recovery(struct nfs_client *);
+extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
-extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
-extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8c..b73c34375f6 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
goto out_err;
}
buf = kmalloc(rlen + 1, GFP_KERNEL);
+ if (!buf) {
+ dprintk("%s: Not enough memory\n", __func__);
+ goto out_err;
+ }
buf[rlen] = '\0';
memcpy(buf, r_addr, rlen);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1ff76acc7e9..0a07e353a96 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,7 +51,6 @@
#include <linux/sunrpc/bc_xprt.h>
#include <linux/xattr.h>
#include <linux/utsname.h>
-#include <linux/mm.h>
#include "nfs4_fs.h"
#include "delegation.h"
@@ -257,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -273,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -296,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
-do_state_recovery:
- nfs4_schedule_state_recovery(clp);
+wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
if (ret == 0)
exception->retry = 1;
@@ -436,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
clp = res->sr_session->clp;
do_renew_lease(clp, timestamp);
/* Check sequence flags */
- if (atomic_read(&clp->cl_count) > 1)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (res->sr_status_flags != 0)
+ nfs4_schedule_lease_recovery(clp);
break;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@@ -1256,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(
- server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_lease_recovery(server->nfs_client);
goto out;
case -ERESTARTSYS:
/*
@@ -1272,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
*/
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
* User RPCSEC_GSS context has expired.
@@ -1588,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
ret = -EIO;
}
return ret;
@@ -3179,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
return;
}
do_renew_lease(clp, timestamp);
@@ -3262,7 +3260,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
spages = pages;
do {
- len = min(PAGE_CACHE_SIZE, buflen);
+ len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
newpage = alloc_page(GFP_KERNEL);
if (newpage == NULL)
@@ -3504,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3520,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3537,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-do_state_recovery:
+wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
@@ -4150,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("%s: cancelling lock!\n", __func__);
} else
nfs_free_seqid(data->arg.lock_seqid);
@@ -4174,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_state *state = lsp->ls_state;
-
switch (error) {
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_EXPIRED:
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
(lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_nograce(clp, state);
- lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
- if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_reboot(clp, state);
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
};
}
@@ -4406,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ goto out;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -ERESTARTSYS:
/*
@@ -4421,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
err = 0;
goto out;
case -EKEYEXPIRED:
@@ -5028,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ long timeout = 0;
+ int err;
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ do {
+ status = _nfs4_proc_create_session(clp);
+ if (status == -NFS4ERR_DELAY) {
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ if (err)
+ status = err;
+ }
+ } while (status == -NFS4ERR_DELAY);
+
if (status)
goto out;
@@ -5140,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5227,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if (IS_ERR(task))
ret = PTR_ERR(task);
else
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
@@ -5243,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret)
+ if (!ret) {
+ struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+
+ if (task->tk_status == 0)
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
ret = task->tk_status;
+ }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5281,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5349,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
status = PTR_ERR(task);
goto out;
}
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status == 0)
+ status = task->tk_status;
rpc_put_task(task);
return 0;
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04..0592288f9f0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
}
/*
- * Schedule a state recovery attempt
+ * Schedule a lease recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs_client *clp)
+void nfs4_schedule_lease_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
return 1;
}
-int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
return 1;
}
+void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
}
#ifdef CONFIG_NFS_V4_1
+void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+ nfs4_schedule_lease_recovery(session->clp);
+}
+
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
clp->cl_boot_time = CURRENT_TIME;
nfs4_state_start_reclaim_nograce(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee..94d50e86a12 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
*p++ = cpu_to_be32(OP_CREATE_SESSION);
- p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_ex_clid);
+ xdr_decode_hyper(p, &clp->cl_clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a2002..c541093a5bf 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
+/* Default NFSROOT mount options. */
+#define NFS_DEF_OPTIONS "udp"
+
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
-static char nfs_root_options[256] __initdata = "";
+static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
}
static int __init root_nfs_cat(char *dest, const char *src,
- const size_t destlen)
+ const size_t destlen)
{
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',')
+ if (strlcat(dest, ",", destlen) > destlen)
+ return -1;
+
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
-
- /*
- * Possibly prepare for more options to be appended
- */
- if (nfs_root_options[0] != '\0' &&
- nfs_root_options[strlen(nfs_root_options)] != ',')
- if (root_nfs_cat(nfs_root_options, ",",
- sizeof(nfs_root_options)))
- return -1;
-
return 0;
}
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
*/
static int __init root_nfs_data(char *cmdline)
{
- char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+ char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
* Append mandatory options for nfsroot so they override
* what has come before
*/
- snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+ snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
- if (root_nfs_cat(nfs_root_options, addr_option,
+ if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd..6481d537d69 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
return 1;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046c..42b92d7a9cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (how & FLUSH_SYNC)
+ rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242dd..124e8fcb0dd 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
static struct file *do_open(char *name, int flags)
{
- struct nameidata nd;
struct vfsmount *mnt;
- int error;
+ struct file *file;
mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
if (IS_ERR(mnt))
return (struct file *)mnt;
- error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd);
- mntput(mnt); /* drop do_kern_mount reference */
- if (error)
- return ERR_PTR(error);
-
- if (flags == O_RDWR)
- error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
- else
- error = may_open(&nd.path, MAY_WRITE, flags);
+ file = file_open_root(mnt->mnt_root, mnt, name, flags);
- if (!error)
- return dentry_open(nd.path.dentry, nd.path.mnt, flags,
- current_cred());
-
- path_put(&nd.path);
- return ERR_PTR(error);
+ mntput(mnt); /* drop do_kern_mount reference */
+ return file;
}
static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f34..02eb4edf0ec 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8..7b566ec14e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
static struct nfs4_delegation *
find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
- struct nfs4_delegation *dp = NULL;
+ struct nfs4_delegation *dp;
spin_lock(&recall_lock);
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
- if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
- break;
- }
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
+ spin_unlock(&recall_lock);
+ return dp;
+ }
spin_unlock(&recall_lock);
- return dp;
+ return NULL;
}
int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1275b865507..615f0a9f060 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
u32 dummy;
char *machine_name;
- int i;
+ int i, j;
int nr_secflavs;
READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy * 4);
- for (i = 0; i < dummy; ++i)
+ for (j = 0; j < dummy; ++j)
READ32(dummy);
break;
case RPC_AUTH_GSS:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834..7eb90403fc8 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
int ret = 0; /* if all else fails, just return false */
struct ocfs2_super *osb;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4f..254652a9b54 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
dentry->d_name.len, dentry->d_name.name,
fh, len, connectable);
- if (len < 3 || (connectable && len < 6)) {
- mlog(ML_ERROR, "fh buffer is too small for encoding\n");
+ if (connectable && (len < 6)) {
+ *max_len = 6;
+ type = 255;
+ goto bail;
+ } else if (len < 3) {
+ *max_len = 3;
type = 255;
goto bail;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19ebc5aad39..29623da133c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4379,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path,
if (IS_ERR(s))
return PTR_ERR(s);
- error = path_lookup(s, LOOKUP_PARENT, nd);
+ error = kern_path_parent(s, nd);
if (error)
putname(s);
else
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b..3cac0bda46d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
+
+ /* It's not possible punch hole on append only file */
+ if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ return -EPERM;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
{
struct path path;
int error = -EINVAL;
- int follow;
+ int lookup_flags;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
goto out;
- follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
- error = user_path_at(dfd, filename, follow, &path);
+ lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
goto out;
error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int (*open)(struct inode *, struct file *),
const struct cred *cred)
{
+ static const struct file_operations empty_fops = {};
struct inode *inode;
int error;
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
+
+ if (unlikely(f->f_flags & O_PATH))
+ f->f_mode = FMODE_PATH;
+
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.dentry = dentry;
f->f_path.mnt = mnt;
f->f_pos = 0;
- f->f_op = fops_get(inode->i_fop);
file_sb_list_add(f, inode->i_sb);
+ if (unlikely(f->f_mode & FMODE_PATH)) {
+ f->f_op = &empty_fops;
+ return f;
+ }
+
+ f->f_op = fops_get(inode->i_fop);
+
error = security_dentry_open(f, cred);
if (error)
goto cleanup_all;
@@ -882,15 +903,110 @@ void fd_install(unsigned int fd, struct file *file)
EXPORT_SYMBOL(fd_install);
+static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+{
+ int lookup_flags = 0;
+ int acc_mode;
+
+ if (!(flags & O_CREAT))
+ mode = 0;
+ op->mode = mode;
+
+ /* Must never be set by userspace */
+ flags &= ~FMODE_NONOTIFY;
+
+ /*
+ * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
+ * check for O_DSYNC if the need any syncing at all we enforce it's
+ * always set instead of having to deal with possibly weird behaviour
+ * for malicious applications setting only __O_SYNC.
+ */
+ if (flags & __O_SYNC)
+ flags |= O_DSYNC;
+
+ /*
+ * If we have O_PATH in the open flag. Then we
+ * cannot have anything other than the below set of flags
+ */
+ if (flags & O_PATH) {
+ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
+ acc_mode = 0;
+ } else {
+ acc_mode = MAY_OPEN | ACC_MODE(flags);
+ }
+
+ op->open_flag = flags;
+
+ /* O_TRUNC implies we need access checks for write permissions */
+ if (flags & O_TRUNC)
+ acc_mode |= MAY_WRITE;
+
+ /* Allow the LSM permission hook to distinguish append
+ access from general write access. */
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
+
+ op->acc_mode = acc_mode;
+
+ op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
+
+ if (flags & O_CREAT) {
+ op->intent |= LOOKUP_CREATE;
+ if (flags & O_EXCL)
+ op->intent |= LOOKUP_EXCL;
+ }
+
+ if (flags & O_DIRECTORY)
+ lookup_flags |= LOOKUP_DIRECTORY;
+ if (!(flags & O_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+ return lookup_flags;
+}
+
+/**
+ * filp_open - open file and return file pointer
+ *
+ * @filename: path to open
+ * @flags: open flags as per the open(2) second argument
+ * @mode: mode for the new file if O_CREAT is set, else ignored
+ *
+ * This is the helper to open a file from kernelspace if you really
+ * have to. But in generally you should not do this, so please move
+ * along, nothing to see here..
+ */
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
+ return do_filp_open(AT_FDCWD, filename, &op, lookup);
+}
+EXPORT_SYMBOL(filp_open);
+
+struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *filename, int flags)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, 0, &op);
+ if (flags & O_CREAT)
+ return ERR_PTR(-EINVAL);
+ if (!filename && (flags & O_DIRECTORY))
+ if (!dentry->d_inode->i_op->lookup)
+ return ERR_PTR(-ENOTDIR);
+ return do_file_open_root(dentry, mnt, filename, &op, lookup);
+}
+EXPORT_SYMBOL(file_open_root);
+
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
- struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
+ struct file *f = do_filp_open(dfd, tmp, &op, lookup);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
@@ -960,8 +1076,10 @@ int filp_close(struct file *filp, fl_owner_t id)
if (filp->f_op && filp->f_op->flush)
retval = filp->f_op->flush(filp, id);
- dnotify_flush(filp, id);
- locks_remove_posix(filp, id);
+ if (likely(!(filp->f_mode & FMODE_PATH))) {
+ dnotify_flush(filp, id);
+ locks_remove_posix(filp, id);
+ }
fput(filp);
return retval;
}
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca1..be03a0b08b4 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
#include "check.h"
#include "osf.h"
+#define MAX_OSF_PARTITIONS 8
+
int osf_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
+ unsigned int npartitions;
Sector sect;
unsigned char *data;
struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
u8 p_fstype;
u8 p_frag;
__le16 p_cpg;
- } d_partitions[8];
+ } d_partitions[MAX_OSF_PARTITIONS];
} * label;
struct d_partition * partition;
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0;
}
- for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) {
+ npartitions = le16_to_cpu(label->d_npartitions);
+ if (npartitions > MAX_OSF_PARTITIONS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ for (i = 0 ; i < npartitions; i++, partition++) {
if (slot == state->limit)
break;
if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b20..d49c4b5d2c3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
&proc_self_inode_operations, NULL, {}),
};
-/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- */
-static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode;
- struct task_struct *task;
-
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
-
- inode = dentry->d_inode;
- task = get_proc_task(inode);
- if (task) {
- put_task_struct(task);
- return 1;
- }
- d_drop(dentry);
- return 0;
-}
-
-static const struct dentry_operations proc_base_dentry_operations =
-{
- .d_revalidate = proc_base_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
static struct dentry *proc_base_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68..d6a7ca1fdac 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
+ struct ctl_table_header *head;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
de = PROC_I(inode)->pde;
if (de)
pde_put(de);
- if (PROC_I(inode)->sysctl)
- sysctl_head_put(PROC_I(inode)->sysctl);
+ head = PROC_I(inode)->sysctl;
+ if (head) {
+ rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ sysctl_head_put(head);
+ }
}
struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34e..8eb2522111c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent,
const struct dentry *dentry, const struct inode *inode,
unsigned int len, const char *str, const struct qstr *name)
{
+ struct ctl_table_header *head;
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
+ /* AV: can it, indeed? */
if (!inode)
- return 0;
+ return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(inode)->sysctl);
+ head = rcu_dereference(PROC_I(inode)->sysctl);
+ return !head || !sysctl_is_seen(head);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e..1bba24bad82 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
struct inode *inode = dentry->d_inode;
int maxlen = *lenp;
- if (maxlen < 3)
+ if (need_parent && (maxlen < 5)) {
+ *lenp = 5;
return 255;
+ } else if (maxlen < 3) {
+ *lenp = 3;
+ return 255;
+ }
data[0] = inode->i_ino;
data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 68fdf45cc6c..4b2eb564fda 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1122,10 +1122,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
reiserfs_write_unlock(dir->i_sb);
return -EMLINK;
}
- if (inode->i_nlink == 0) {
- reiserfs_write_unlock(dir->i_sb);
- return -ENOENT;
- }
/* inc before scheduling so reiserfs_unlink knows we are here */
inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e93364..5c11ca82b78 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
return -EPERM;
}
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b70..961039121cb 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int error = -EINVAL;
int lookup_flags = 0;
- if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+ AT_EMPTY_PATH)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flag & AT_NO_AUTOMOUNT)
lookup_flags |= LOOKUP_NO_AUTOMOUNT;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
if (bufsiz <= 0)
return -EINVAL;
- error = user_path_at(dfd, pathname, 0, &path);
+ error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
if (!error) {
struct inode *inode = path.dentry->d_inode;
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996..8244924dec5 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
}
EXPORT_SYMBOL(vfs_statfs);
-static int do_statfs_native(struct path *path, struct statfs *buf)
+int user_statfs(const char __user *pathname, struct kstatfs *st)
{
- struct kstatfs st;
- int retval;
+ struct path path;
+ int error = user_path(pathname, &path);
+ if (!error) {
+ error = vfs_statfs(&path, st);
+ path_put(&path);
+ }
+ return error;
+}
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
+int fd_statfs(int fd, struct kstatfs *st)
+{
+ struct file *file = fget(fd);
+ int error = -EBADF;
+ if (file) {
+ error = vfs_statfs(&file->f_path, st);
+ fput(file);
+ }
+ return error;
+}
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
+{
+ struct statfs buf;
+
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- if (sizeof buf->f_blocks == 4) {
- if ((st.f_blocks | st.f_bfree | st.f_bavail |
- st.f_bsize | st.f_frsize) &
+ if (sizeof buf.f_blocks == 4) {
+ if ((st->f_blocks | st->f_bfree | st->f_bavail |
+ st->f_bsize | st->f_frsize) &
0xffffffff00000000ULL)
return -EOVERFLOW;
/*
* f_files and f_ffree may be -1; it's okay to stuff
* that into 32 bits
*/
- if (st.f_files != -1 &&
- (st.f_files & 0xffffffff00000000ULL))
+ if (st->f_files != -1 &&
+ (st->f_files & 0xffffffff00000000ULL))
return -EOVERFLOW;
- if (st.f_ffree != -1 &&
- (st.f_ffree & 0xffffffff00000000ULL))
+ if (st->f_ffree != -1 &&
+ (st->f_ffree & 0xffffffff00000000ULL))
return -EOVERFLOW;
}
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
-static int do_statfs64(struct path *path, struct statfs64 *buf)
+static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+ struct statfs64 buf;
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs tmp;
- error = do_statfs_native(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
{
- struct path path;
- long error;
-
+ struct kstatfs st;
+ int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs64 tmp;
- error = do_statfs64(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
{
- struct file *file;
- struct statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_native(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
{
- struct file *file;
- struct statfs64 tmp;
+ struct kstatfs st;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs64(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7..7217d67a80a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- *
- * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
- * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
- * lock 'dirA->i_mutex', so this is possible. Both of the functions
- * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
- * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
- * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
- * to the list of orphans. After this, 'vfs_link()' will link
- * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
- * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
- * to the list of orphans.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
err = dbg_check_synced_i_size(inode);
if (err)
return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index b7c338d5e9d..f1dce848ef9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1286,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
struct fid *fid = (struct fid *)fh;
int type = FILEID_UDF_WITHOUT_PARENT;
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *lenp = 5;
+ return 255;
+ } else if (len < 3) {
+ *lenp = 3;
return 255;
+ }
*lenp = 3;
fid->udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fd..f4f878fc008 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
* seven combinations work. The real answer is "don't use v2".
*/
len = xfs_fileid_length(fileid_type);
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
*max_len = len;
switch (fileid_type) {
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 2bcc5c7c22a..61e03dd7939 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -30,6 +30,9 @@ typedef u64 cputime64_t;
#define cputime64_to_jiffies64(__ct) (__ct)
#define jiffies64_to_cputime64(__jif) (__jif)
#define cputime_to_cputime64(__ct) ((u64) __ct)
+#define cputime64_gt(__a, __b) ((__a) > (__b))
+
+#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct)
/*
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 0fc16e3f0bf..84793c7025e 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -80,6 +80,10 @@
#define O_SYNC (__O_SYNC|O_DSYNC)
#endif
+#ifndef O_PATH
+#define O_PATH 010000000
+#endif
+
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
#endif
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 3c2344f4813..01f227e1425 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -6,7 +6,7 @@
#include <asm/errno.h>
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -16,7 +16,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -48,7 +48,8 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
return -ENOSYS;
}
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index b969770196c..57af0338d27 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -646,9 +646,13 @@ __SYSCALL(__NR_prlimit64, sys_prlimit64)
__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
#define __NR_fanotify_mark 263
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
+#define __NR_name_to_handle_at 264
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 265
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
#undef __NR_syscalls
-#define __NR_syscalls 264
+#define __NR_syscalls 266
/*
* All syscalls below here should go away really,
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 597692f1fc8..65970b811e2 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -34,7 +34,10 @@ struct debug_obj {
/**
* struct debug_obj_descr - object type specific debug description structure
+ *
* @name: name of the object typee
+ * @debug_hint: function returning address, which have associated
+ * kernel symbol, to allow identify the object
* @fixup_init: fixup function, which is called when the init check
* fails
* @fixup_activate: fixup function, which is called when the activate check
@@ -46,7 +49,7 @@ struct debug_obj {
*/
struct debug_obj_descr {
const char *name;
-
+ void *(*debug_hint) (void *addr);
int (*fixup_init) (void *addr, enum debug_obj_state state);
int (*fixup_activate) (void *addr, enum debug_obj_state state);
int (*fixup_destroy) (void *addr, enum debug_obj_state state);
diff --git a/include/linux/device.h b/include/linux/device.h
index 1bf5cf0b451..ca5d25225aa 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -128,9 +128,7 @@ struct device_driver {
bool suppress_bind_attrs; /* disables bind/unbind via sysfs */
-#if defined(CONFIG_OF)
const struct of_device_id *of_match_table;
-#endif
int (*probe) (struct device *dev);
int (*remove) (struct device *dev);
@@ -441,9 +439,8 @@ struct device {
override */
/* arch specific additions */
struct dev_archdata archdata;
-#ifdef CONFIG_OF
- struct device_node *of_node;
-#endif
+
+ struct device_node *of_node; /* associated device tree node */
dev_t devt; /* dev_t, creates the sysfs "dev" */
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 28028988c86..33a42f24b27 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -8,6 +8,9 @@ struct inode;
struct super_block;
struct vfsmount;
+/* limit the handle size to NFSv4 handle size now */
+#define MAX_HANDLE_SZ 128
+
/*
* The fileid_type identifies how the file within the filesystem is encoded.
* In theory this is freely set and parsed by the filesystem, but we try to
@@ -121,8 +124,10 @@ struct fid {
* set, the encode_fh() should store sufficient information so that a good
* attempt can be made to find not only the file but also it's place in the
* filesystem. This typically means storing a reference to de->d_parent in
- * the filehandle fragment. encode_fh() should return the number of bytes
- * stored or a negative error code such as %-ENOSPC
+ * the filehandle fragment. encode_fh() should return the fileid_type on
+ * success and on error returns 255 (if the space needed to encode fh is
+ * greater than @max_len*4 bytes). On error @max_len contains the minimum
+ * size(in 4 byte unit) needed to encode the file handle.
*
* fh_to_dentry:
* @fh_to_dentry is given a &struct super_block (@sb) and a file handle
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a562fa5fb4e..f550f894ba1 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -46,6 +46,7 @@
unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
+#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
#ifdef __KERNEL__
diff --git a/include/linux/file.h b/include/linux/file.h
index e85baebf627..21a79958541 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
extern struct file *fget(unsigned int fd);
extern struct file *fget_light(unsigned int fd, int *fput_needed);
+extern struct file *fget_raw(unsigned int fd);
+extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
extern void set_close_on_exec(unsigned int fd, int flag);
extern void put_filp(struct file *);
extern int alloc_fd(unsigned start, unsigned flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e38b50a4b9d..13df14e2c42 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -102,6 +102,9 @@ struct inodes_stat_t {
/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
+/* File is opened with O_PATH; almost nothing can be done with it */
+#define FMODE_PATH ((__force fmode_t)0x4000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
@@ -978,6 +981,13 @@ struct file {
#endif
};
+struct file_handle {
+ __u32 handle_bytes;
+ int handle_type;
+ /* file identifier */
+ unsigned char f_handle[0];
+};
+
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1401,6 +1411,7 @@ struct super_block {
wait_queue_head_t s_wait_unfrozen;
char s_id[32]; /* Informational name */
+ u8 s_uuid[16]; /* UUID */
void *s_fs_info; /* Filesystem private info */
fmode_t s_mode;
@@ -1874,6 +1885,8 @@ extern void drop_collected_mounts(struct vfsmount *);
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
struct vfsmount *);
extern int vfs_statfs(struct path *, struct kstatfs *);
+extern int user_statfs(const char __user *, struct kstatfs *);
+extern int fd_statfs(int, struct kstatfs *);
extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
extern int freeze_super(struct super_block *super);
extern int thaw_super(struct super_block *super);
@@ -1990,6 +2003,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
extern long do_sys_open(int dfd, const char __user *filename, int flags,
int mode);
extern struct file *filp_open(const char *, int, int);
+extern struct file *file_open_root(struct dentry *, struct vfsmount *,
+ const char *, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
@@ -2205,10 +2220,6 @@ extern struct file *create_read_pipe(struct file *f, int flags);
extern struct file *create_write_pipe(int flags);
extern void free_write_pipe(struct file *);
-extern struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode);
-extern int may_open(struct path *, int, int);
-
extern int kernel_read(struct file *, loff_t, char *, unsigned long);
extern struct file * open_exec(const char *);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f376ddc64c4..62f500c724f 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -54,11 +54,13 @@ enum hrtimer_restart {
* 0x00 inactive
* 0x01 enqueued into rbtree
* 0x02 callback function running
+ * 0x04 timer is migrated to another cpu
*
* Special cases:
* 0x03 callback function running and enqueued
* (was requeued on another CPU)
- * 0x09 timer was migrated on CPU hotunplug
+ * 0x05 timer was migrated on CPU hotunplug
+ *
* The "callback function running and enqueued" status is only possible on
* SMP. It happens for example when a posix timer expired and the callback
* queued a signal. Between dropping the lock which protects the posix timer
@@ -67,8 +69,11 @@ enum hrtimer_restart {
* as otherwise the timer could be removed before the softirq code finishes the
* the handling of the timer.
*
- * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state to
- * preserve the HRTIMER_STATE_CALLBACK bit in the above scenario.
+ * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
+ * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
+ * also affects HRTIMER_STATE_MIGRATE where the preservation is not
+ * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
+ * enqueued on the new cpu.
*
* All state transitions are protected by cpu_base->lock.
*/
@@ -148,7 +153,12 @@ struct hrtimer_clock_base {
#endif
};
-#define HRTIMER_MAX_CLOCK_BASES 2
+enum hrtimer_base_type {
+ HRTIMER_BASE_REALTIME,
+ HRTIMER_BASE_MONOTONIC,
+ HRTIMER_BASE_BOOTTIME,
+ HRTIMER_MAX_CLOCK_BASES,
+};
/*
* struct hrtimer_cpu_base - the per cpu clock bases
@@ -308,6 +318,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
extern ktime_t ktime_get(void);
extern ktime_t ktime_get_real(void);
+extern ktime_t ktime_get_boottime(void);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
@@ -370,8 +381,9 @@ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
extern ktime_t hrtimer_get_next_event(void);
/*
- * A timer is active, when it is enqueued into the rbtree or the callback
- * function is running.
+ * A timer is active, when it is enqueued into the rbtree or the
+ * callback function is running or it's in the state of being migrated
+ * to another cpu.
*/
static inline int hrtimer_active(const struct hrtimer *timer)
{
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 903576df88d..06a8d9c7de9 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -258,9 +258,7 @@ struct i2c_board_info {
unsigned short addr;
void *platform_data;
struct dev_archdata *archdata;
-#ifdef CONFIG_OF
struct device_node *of_node;
-#endif
int irq;
};
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d4253e4..59b72ca1c5d 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,8 @@
#include <linux/smp.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
#include <asm/atomic.h>
#include <asm/ptrace.h>
@@ -55,7 +57,8 @@
* Used by threaded interrupts which need to keep the
* irq line disabled until the threaded handler has been run.
* IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
- *
+ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
+ * IRQF_NO_THREAD - Interrupt cannot be threaded
*/
#define IRQF_DISABLED 0x00000020
#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -67,22 +70,10 @@
#define IRQF_IRQPOLL 0x00001000
#define IRQF_ONESHOT 0x00002000
#define IRQF_NO_SUSPEND 0x00004000
+#define IRQF_FORCE_RESUME 0x00008000
+#define IRQF_NO_THREAD 0x00010000
-#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND)
-
-/*
- * Bits used by threaded handlers:
- * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
- * IRQTF_DIED - handler thread died
- * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
- * IRQTF_AFFINITY - irq thread is requested to adjust affinity
- */
-enum {
- IRQTF_RUNTHREAD,
- IRQTF_DIED,
- IRQTF_WARNED,
- IRQTF_AFFINITY,
-};
+#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
/*
* These values can be returned by request_any_context_irq() and
@@ -110,6 +101,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* @thread_fn: interupt handler function for threaded interrupts
* @thread: thread pointer for threaded interrupts
* @thread_flags: flags related to @thread
+ * @thread_mask: bitmask for keeping track of @thread activity
*/
struct irqaction {
irq_handler_t handler;
@@ -120,6 +112,7 @@ struct irqaction {
irq_handler_t thread_fn;
struct task_struct *thread;
unsigned long thread_flags;
+ unsigned long thread_mask;
const char *name;
struct proc_dir_entry *dir;
} ____cacheline_internodealigned_in_smp;
@@ -240,6 +233,35 @@ extern int irq_can_set_affinity(unsigned int irq);
extern int irq_select_affinity(unsigned int irq);
extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
+
+/**
+ * struct irq_affinity_notify - context for notification of IRQ affinity changes
+ * @irq: Interrupt to which notification applies
+ * @kref: Reference count, for internal use
+ * @work: Work item, for internal use
+ * @notify: Function to be called on change. This will be
+ * called in process context.
+ * @release: Function to be called on release. This will be
+ * called in process context. Once registered, the
+ * structure must only be freed when this function is
+ * called or later.
+ */
+struct irq_affinity_notify {
+ unsigned int irq;
+ struct kref kref;
+ struct work_struct work;
+ void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
+ void (*release)(struct kref *ref);
+};
+
+extern int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
+
+static inline void irq_run_affinity_notifiers(void)
+{
+ flush_scheduled_work();
+}
+
#else /* CONFIG_SMP */
static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -255,7 +277,7 @@ static inline int irq_can_set_affinity(unsigned int irq)
static inline int irq_select_affinity(unsigned int irq) { return 0; }
static inline int irq_set_affinity_hint(unsigned int irq,
- const struct cpumask *m)
+ const struct cpumask *m)
{
return -EINVAL;
}
@@ -314,16 +336,24 @@ static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long
}
/* IRQ wakeup (PM) control: */
-extern int set_irq_wake(unsigned int irq, unsigned int on);
+extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+/* Please do not use: Use the replacement functions instead */
+static inline int set_irq_wake(unsigned int irq, unsigned int on)
+{
+ return irq_set_irq_wake(irq, on);
+}
+#endif
static inline int enable_irq_wake(unsigned int irq)
{
- return set_irq_wake(irq, 1);
+ return irq_set_irq_wake(irq, 1);
}
static inline int disable_irq_wake(unsigned int irq)
{
- return set_irq_wake(irq, 0);
+ return irq_set_irq_wake(irq, 0);
}
#else /* !CONFIG_GENERIC_HARDIRQS */
@@ -353,6 +383,13 @@ static inline int disable_irq_wake(unsigned int irq)
}
#endif /* CONFIG_GENERIC_HARDIRQS */
+
+#ifdef CONFIG_IRQ_FORCED_THREADING
+extern bool force_irqthreads;
+#else
+#define force_irqthreads (0)
+#endif
+
#ifndef __ARCH_SET_SOFTIRQ_PENDING
#define set_softirq_pending(x) (local_softirq_pending() = (x))
#define or_softirq_pending(x) (local_softirq_pending() |= (x))
@@ -426,6 +463,13 @@ extern void raise_softirq(unsigned int nr);
*/
DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
+
+static inline struct task_struct *this_cpu_ksoftirqd(void)
+{
+ return this_cpu_read(ksoftirqd);
+}
+
/* Try to send a softirq to a remote cpu. If this cannot be done, the
* work will be queued to the local cpu.
*/
@@ -645,6 +689,7 @@ static inline void init_irq_proc(void)
struct seq_file;
int show_interrupts(struct seq_file *p, void *v);
+int arch_show_interrupts(struct seq_file *p, int prec);
extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80fcb53057b..1d3577f30d4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -29,61 +29,104 @@
#include <asm/irq_regs.h>
struct irq_desc;
+struct irq_data;
typedef void (*irq_flow_handler_t)(unsigned int irq,
struct irq_desc *desc);
-
+typedef void (*irq_preflow_handler_t)(struct irq_data *data);
/*
* IRQ line status.
*
- * Bits 0-7 are reserved for the IRQF_* bits in linux/interrupt.h
+ * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
+ *
+ * IRQ_TYPE_NONE - default, unspecified type
+ * IRQ_TYPE_EDGE_RISING - rising edge triggered
+ * IRQ_TYPE_EDGE_FALLING - falling edge triggered
+ * IRQ_TYPE_EDGE_BOTH - rising and falling edge triggered
+ * IRQ_TYPE_LEVEL_HIGH - high level triggered
+ * IRQ_TYPE_LEVEL_LOW - low level triggered
+ * IRQ_TYPE_LEVEL_MASK - Mask to filter out the level bits
+ * IRQ_TYPE_SENSE_MASK - Mask for all the above bits
+ * IRQ_TYPE_PROBE - Special flag for probing in progress
+ *
+ * Bits which can be modified via irq_set/clear/modify_status_flags()
+ * IRQ_LEVEL - Interrupt is level type. Will be also
+ * updated in the code when the above trigger
+ * bits are modified via set_irq_type()
+ * IRQ_PER_CPU - Mark an interrupt PER_CPU. Will protect
+ * it from affinity setting
+ * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing
+ * IRQ_NOREQUEST - Interrupt cannot be requested via
+ * request_irq()
+ * IRQ_NOAUTOEN - Interrupt is not automatically enabled in
+ * request/setup_irq()
+ * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set)
+ * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
+ * IRQ_NESTED_TRHEAD - Interrupt nests into another thread
+ *
+ * Deprecated bits. They are kept updated as long as
+ * CONFIG_GENERIC_HARDIRQS_NO_COMPAT is not set. Will go away soon. These bits
+ * are internal state of the core code and if you really need to acces
+ * them then talk to the genirq maintainer instead of hacking
+ * something weird.
*
- * IRQ types
*/
-#define IRQ_TYPE_NONE 0x00000000 /* Default, unspecified type */
-#define IRQ_TYPE_EDGE_RISING 0x00000001 /* Edge rising type */
-#define IRQ_TYPE_EDGE_FALLING 0x00000002 /* Edge falling type */
-#define IRQ_TYPE_EDGE_BOTH (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)
-#define IRQ_TYPE_LEVEL_HIGH 0x00000004 /* Level high type */
-#define IRQ_TYPE_LEVEL_LOW 0x00000008 /* Level low type */
-#define IRQ_TYPE_SENSE_MASK 0x0000000f /* Mask of the above */
-#define IRQ_TYPE_PROBE 0x00000010 /* Probing in progress */
-
-/* Internal flags */
-#define IRQ_INPROGRESS 0x00000100 /* IRQ handler active - do not enter! */
-#define IRQ_DISABLED 0x00000200 /* IRQ disabled - do not enter! */
-#define IRQ_PENDING 0x00000400 /* IRQ pending - replay on enable */
-#define IRQ_REPLAY 0x00000800 /* IRQ has been replayed but not acked yet */
-#define IRQ_AUTODETECT 0x00001000 /* IRQ is being autodetected */
-#define IRQ_WAITING 0x00002000 /* IRQ not yet seen - for autodetection */
-#define IRQ_LEVEL 0x00004000 /* IRQ level triggered */
-#define IRQ_MASKED 0x00008000 /* IRQ masked - shouldn't be seen again */
-#define IRQ_PER_CPU 0x00010000 /* IRQ is per CPU */
-#define IRQ_NOPROBE 0x00020000 /* IRQ is not valid for probing */
-#define IRQ_NOREQUEST 0x00040000 /* IRQ cannot be requested */
-#define IRQ_NOAUTOEN 0x00080000 /* IRQ will not be enabled on request irq */
-#define IRQ_WAKEUP 0x00100000 /* IRQ triggers system wakeup */
-#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
-#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
-#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
-#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
-#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
-#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
-#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */
-#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */
+enum {
+ IRQ_TYPE_NONE = 0x00000000,
+ IRQ_TYPE_EDGE_RISING = 0x00000001,
+ IRQ_TYPE_EDGE_FALLING = 0x00000002,
+ IRQ_TYPE_EDGE_BOTH = (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING),
+ IRQ_TYPE_LEVEL_HIGH = 0x00000004,
+ IRQ_TYPE_LEVEL_LOW = 0x00000008,
+ IRQ_TYPE_LEVEL_MASK = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
+ IRQ_TYPE_SENSE_MASK = 0x0000000f,
+
+ IRQ_TYPE_PROBE = 0x00000010,
+
+ IRQ_LEVEL = (1 << 8),
+ IRQ_PER_CPU = (1 << 9),
+ IRQ_NOPROBE = (1 << 10),
+ IRQ_NOREQUEST = (1 << 11),
+ IRQ_NOAUTOEN = (1 << 12),
+ IRQ_NO_BALANCING = (1 << 13),
+ IRQ_MOVE_PCNTXT = (1 << 14),
+ IRQ_NESTED_THREAD = (1 << 15),
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+ IRQ_INPROGRESS = (1 << 16),
+ IRQ_REPLAY = (1 << 17),
+ IRQ_WAITING = (1 << 18),
+ IRQ_DISABLED = (1 << 19),
+ IRQ_PENDING = (1 << 20),
+ IRQ_MASKED = (1 << 21),
+ IRQ_MOVE_PENDING = (1 << 22),
+ IRQ_AFFINITY_SET = (1 << 23),
+ IRQ_WAKEUP = (1 << 24),
+#endif
+};
#define IRQF_MODIFY_MASK \
(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
- IRQ_PER_CPU)
+ IRQ_PER_CPU | IRQ_NESTED_THREAD)
-#ifdef CONFIG_IRQ_PER_CPU
-# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
-# define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
-#else
-# define CHECK_IRQ_PER_CPU(var) 0
-# define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING
-#endif
+#define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
+
+static inline __deprecated bool CHECK_IRQ_PER_CPU(unsigned int status)
+{
+ return status & IRQ_PER_CPU;
+}
+
+/*
+ * Return value for chip->irq_set_affinity()
+ *
+ * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity
+ * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity
+ */
+enum {
+ IRQ_SET_MASK_OK = 0,
+ IRQ_SET_MASK_OK_NOCOPY,
+};
struct msi_desc;
@@ -91,6 +134,8 @@ struct msi_desc;
* struct irq_data - per irq and irq chip data passed down to chip functions
* @irq: interrupt number
* @node: node index useful for balancing
+ * @state_use_accessor: status information for irq chip functions.
+ * Use accessor functions to deal with it
* @chip: low level interrupt hardware access
* @handler_data: per-IRQ data for the irq_chip methods
* @chip_data: platform-specific per-chip private data for the chip
@@ -105,6 +150,7 @@ struct msi_desc;
struct irq_data {
unsigned int irq;
unsigned int node;
+ unsigned int state_use_accessors;
struct irq_chip *chip;
void *handler_data;
void *chip_data;
@@ -114,6 +160,80 @@ struct irq_data {
#endif
};
+/*
+ * Bit masks for irq_data.state
+ *
+ * IRQD_TRIGGER_MASK - Mask for the trigger type bits
+ * IRQD_SETAFFINITY_PENDING - Affinity setting is pending
+ * IRQD_NO_BALANCING - Balancing disabled for this IRQ
+ * IRQD_PER_CPU - Interrupt is per cpu
+ * IRQD_AFFINITY_SET - Interrupt affinity was set
+ * IRQD_LEVEL - Interrupt is level triggered
+ * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup
+ * from suspend
+ * IRDQ_MOVE_PCNTXT - Interrupt can be moved in process
+ * context
+ */
+enum {
+ IRQD_TRIGGER_MASK = 0xf,
+ IRQD_SETAFFINITY_PENDING = (1 << 8),
+ IRQD_NO_BALANCING = (1 << 10),
+ IRQD_PER_CPU = (1 << 11),
+ IRQD_AFFINITY_SET = (1 << 12),
+ IRQD_LEVEL = (1 << 13),
+ IRQD_WAKEUP_STATE = (1 << 14),
+ IRQD_MOVE_PCNTXT = (1 << 15),
+};
+
+static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_SETAFFINITY_PENDING;
+}
+
+static inline bool irqd_is_per_cpu(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_PER_CPU;
+}
+
+static inline bool irqd_can_balance(struct irq_data *d)
+{
+ return !(d->state_use_accessors & (IRQD_PER_CPU | IRQD_NO_BALANCING));
+}
+
+static inline bool irqd_affinity_was_set(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_AFFINITY_SET;
+}
+
+static inline u32 irqd_get_trigger_type(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_TRIGGER_MASK;
+}
+
+/*
+ * Must only be called inside irq_chip.irq_set_type() functions.
+ */
+static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
+{
+ d->state_use_accessors &= ~IRQD_TRIGGER_MASK;
+ d->state_use_accessors |= type & IRQD_TRIGGER_MASK;
+}
+
+static inline bool irqd_is_level_type(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_LEVEL;
+}
+
+static inline bool irqd_is_wakeup_set(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_WAKEUP_STATE;
+}
+
+static inline bool irqd_can_move_in_process_context(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_MOVE_PCNTXT;
+}
+
/**
* struct irq_chip - hardware interrupt chip descriptor
*
@@ -150,6 +270,7 @@ struct irq_data {
* @irq_set_wake: enable/disable power-management wake-on of an IRQ
* @irq_bus_lock: function to lock access to slow bus (i2c) chips
* @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
+ * @flags: chip specific flags
*
* @release: release function solely used by UML
*/
@@ -196,12 +317,27 @@ struct irq_chip {
void (*irq_bus_lock)(struct irq_data *data);
void (*irq_bus_sync_unlock)(struct irq_data *data);
+ unsigned long flags;
+
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
void (*release)(unsigned int irq, void *dev_id);
#endif
};
+/*
+ * irq_chip specific flags
+ *
+ * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type()
+ * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled
+ * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path
+ */
+enum {
+ IRQCHIP_SET_TYPE_MASKED = (1 << 0),
+ IRQCHIP_EOI_IF_HANDLED = (1 << 1),
+ IRQCHIP_MASK_ON_SUSPEND = (1 << 2),
+};
+
/* This include will go away once we isolated irq_desc usage to core code */
#include <linux/irqdesc.h>
@@ -218,7 +354,7 @@ struct irq_chip {
# define ARCH_IRQ_INIT_FLAGS 0
#endif
-#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS)
+#define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS
struct irqaction;
extern int setup_irq(unsigned int irq, struct irqaction *new);
@@ -229,9 +365,13 @@ extern void remove_irq(unsigned int irq, struct irqaction *act);
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
void move_native_irq(int irq);
void move_masked_irq(int irq);
+void irq_move_irq(struct irq_data *data);
+void irq_move_masked_irq(struct irq_data *data);
#else
static inline void move_native_irq(int irq) { }
static inline void move_masked_irq(int irq) { }
+static inline void irq_move_irq(struct irq_data *data) { }
+static inline void irq_move_masked_irq(struct irq_data *data) { }
#endif
extern int no_irq_affinity;
@@ -267,23 +407,23 @@ extern struct irq_chip no_irq_chip;
extern struct irq_chip dummy_irq_chip;
extern void
-set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
- irq_flow_handler_t handle);
-extern void
-set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
irq_flow_handler_t handle, const char *name);
+static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle)
+{
+ irq_set_chip_and_handler_name(irq, chip, handle, NULL);
+}
+
extern void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
const char *name);
-/*
- * Set a highlevel flow handler for a given IRQ:
- */
static inline void
-set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
+irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
{
- __set_irq_handler(irq, handle, 0, NULL);
+ __irq_set_handler(irq, handle, 0, NULL);
}
/*
@@ -292,14 +432,11 @@ set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
* IRQ_NOREQUEST and IRQ_NOPROBE)
*/
static inline void
-set_irq_chained_handler(unsigned int irq,
- irq_flow_handler_t handle)
+irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
{
- __set_irq_handler(irq, handle, 1, NULL);
+ __irq_set_handler(irq, handle, 1, NULL);
}
-extern void set_irq_nested_thread(unsigned int irq, int nest);
-
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
@@ -312,16 +449,24 @@ static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
irq_modify_status(irq, clr, 0);
}
-static inline void set_irq_noprobe(unsigned int irq)
+static inline void irq_set_noprobe(unsigned int irq)
{
irq_modify_status(irq, 0, IRQ_NOPROBE);
}
-static inline void set_irq_probe(unsigned int irq)
+static inline void irq_set_probe(unsigned int irq)
{
irq_modify_status(irq, IRQ_NOPROBE, 0);
}
+static inline void irq_set_nested_thread(unsigned int irq, bool nest)
+{
+ if (nest)
+ irq_set_status_flags(irq, IRQ_NESTED_THREAD);
+ else
+ irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
+}
+
/* Handle dynamic irq creation and destruction */
extern unsigned int create_irq_nr(unsigned int irq_want, int node);
extern int create_irq(void);
@@ -338,14 +483,14 @@ static inline void dynamic_irq_init(unsigned int irq)
}
/* Set/get chip/data for an IRQ: */
-extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
-extern int set_irq_data(unsigned int irq, void *data);
-extern int set_irq_chip_data(unsigned int irq, void *data);
-extern int set_irq_type(unsigned int irq, unsigned int type);
-extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
+extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
+extern int irq_set_handler_data(unsigned int irq, void *data);
+extern int irq_set_chip_data(unsigned int irq, void *data);
+extern int irq_set_irq_type(unsigned int irq, unsigned int type);
+extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
extern struct irq_data *irq_get_irq_data(unsigned int irq);
-static inline struct irq_chip *get_irq_chip(unsigned int irq)
+static inline struct irq_chip *irq_get_chip(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->chip : NULL;
@@ -356,7 +501,7 @@ static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
return d->chip;
}
-static inline void *get_irq_chip_data(unsigned int irq)
+static inline void *irq_get_chip_data(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->chip_data : NULL;
@@ -367,18 +512,18 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
return d->chip_data;
}
-static inline void *get_irq_data(unsigned int irq)
+static inline void *irq_get_handler_data(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->handler_data : NULL;
}
-static inline void *irq_data_get_irq_data(struct irq_data *d)
+static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
{
return d->handler_data;
}
-static inline struct msi_desc *get_irq_msi(unsigned int irq)
+static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->msi_desc : NULL;
@@ -389,6 +534,89 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
return d->msi_desc;
}
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+/* Please do not use: Use the replacement functions instead */
+static inline int set_irq_chip(unsigned int irq, struct irq_chip *chip)
+{
+ return irq_set_chip(irq, chip);
+}
+static inline int set_irq_data(unsigned int irq, void *data)
+{
+ return irq_set_handler_data(irq, data);
+}
+static inline int set_irq_chip_data(unsigned int irq, void *data)
+{
+ return irq_set_chip_data(irq, data);
+}
+static inline int set_irq_type(unsigned int irq, unsigned int type)
+{
+ return irq_set_irq_type(irq, type);
+}
+static inline int set_irq_msi(unsigned int irq, struct msi_desc *entry)
+{
+ return irq_set_msi_desc(irq, entry);
+}
+static inline struct irq_chip *get_irq_chip(unsigned int irq)
+{
+ return irq_get_chip(irq);
+}
+static inline void *get_irq_chip_data(unsigned int irq)
+{
+ return irq_get_chip_data(irq);
+}
+static inline void *get_irq_data(unsigned int irq)
+{
+ return irq_get_handler_data(irq);
+}
+static inline void *irq_data_get_irq_data(struct irq_data *d)
+{
+ return irq_data_get_irq_handler_data(d);
+}
+static inline struct msi_desc *get_irq_msi(unsigned int irq)
+{
+ return irq_get_msi_desc(irq);
+}
+static inline void set_irq_noprobe(unsigned int irq)
+{
+ irq_set_noprobe(irq);
+}
+static inline void set_irq_probe(unsigned int irq)
+{
+ irq_set_probe(irq);
+}
+static inline void set_irq_nested_thread(unsigned int irq, int nest)
+{
+ irq_set_nested_thread(irq, nest);
+}
+static inline void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle, const char *name)
+{
+ irq_set_chip_and_handler_name(irq, chip, handle, name);
+}
+static inline void
+set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle)
+{
+ irq_set_chip_and_handler(irq, chip, handle);
+}
+static inline void
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
+{
+ __irq_set_handler(irq, handle, is_chained, name);
+}
+static inline void set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
+{
+ irq_set_handler(irq, handle);
+}
+static inline void
+set_irq_chained_handler(unsigned int irq, irq_flow_handler_t handle)
+{
+ irq_set_chained_handler(irq, handle);
+}
+#endif
+
int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
void irq_free_descs(unsigned int irq, unsigned int cnt);
int irq_reserve_irqs(unsigned int from, unsigned int cnt);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c1a95b7b58d..00218371518 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -8,6 +8,7 @@
* For now it's included from <linux/irq.h>
*/
+struct irq_affinity_notify;
struct proc_dir_entry;
struct timer_rand_state;
/**
@@ -18,13 +19,16 @@ struct timer_rand_state;
* @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()]
* @action: the irq action chain
* @status: status information
+ * @core_internal_state__do_not_mess_with_it: core internal status information
* @depth: disable-depth, for nested irq_disable() calls
* @wake_depth: enable depth, for multiple set_irq_wake() callers
* @irq_count: stats field to detect stalled irqs
* @last_unhandled: aging timer for unhandled count
* @irqs_unhandled: stats field for spurious unhandled interrupts
* @lock: locking for SMP
+ * @affinity_notify: context for notification of affinity changes
* @pending_mask: pending rebalanced interrupts
+ * @threads_oneshot: bitfield to handle shared oneshot threads
* @threads_active: number of irqaction threads currently running
* @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
* @dir: /proc/irq/ procfs entry
@@ -45,6 +49,7 @@ struct irq_desc {
struct {
unsigned int irq;
unsigned int node;
+ unsigned int pad_do_not_even_think_about_it;
struct irq_chip *chip;
void *handler_data;
void *chip_data;
@@ -59,9 +64,16 @@ struct irq_desc {
struct timer_rand_state *timer_rand_state;
unsigned int __percpu *kstat_irqs;
irq_flow_handler_t handle_irq;
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+ irq_preflow_handler_t preflow_handler;
+#endif
struct irqaction *action; /* IRQ action list */
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+ unsigned int status_use_accessors;
+#else
unsigned int status; /* IRQ status */
-
+#endif
+ unsigned int core_internal_state__do_not_mess_with_it;
unsigned int depth; /* nested irq disables */
unsigned int wake_depth; /* nested wake enables */
unsigned int irq_count; /* For detecting broken IRQs */
@@ -70,10 +82,12 @@ struct irq_desc {
raw_spinlock_t lock;
#ifdef CONFIG_SMP
const struct cpumask *affinity_hint;
+ struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_var_t pending_mask;
#endif
#endif
+ unsigned long threads_oneshot;
atomic_t threads_active;
wait_queue_head_t wait_for_threads;
#ifdef CONFIG_PROC_FS
@@ -95,10 +109,51 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
#ifdef CONFIG_GENERIC_HARDIRQS
-#define get_irq_desc_chip(desc) ((desc)->irq_data.chip)
-#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data)
-#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data)
-#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc)
+static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
+{
+ return &desc->irq_data;
+}
+
+static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc)
+{
+ return desc->irq_data.chip;
+}
+
+static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
+{
+ return desc->irq_data.chip_data;
+}
+
+static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
+{
+ return desc->irq_data.handler_data;
+}
+
+static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
+{
+ return desc->irq_data.msi_desc;
+}
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+static inline struct irq_chip *get_irq_desc_chip(struct irq_desc *desc)
+{
+ return irq_desc_get_chip(desc);
+}
+static inline void *get_irq_desc_data(struct irq_desc *desc)
+{
+ return irq_desc_get_handler_data(desc);
+}
+
+static inline void *get_irq_desc_chip_data(struct irq_desc *desc)
+{
+ return irq_desc_get_chip_data(desc);
+}
+
+static inline struct msi_desc *get_irq_desc_msi(struct irq_desc *desc)
+{
+ return irq_desc_get_msi_desc(desc);
+}
+#endif
/*
* Architectures call this to let the generic IRQ layer
@@ -123,6 +178,7 @@ static inline int irq_has_action(unsigned int irq)
return desc->action != NULL;
}
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
static inline int irq_balancing_disabled(unsigned int irq)
{
struct irq_desc *desc;
@@ -130,6 +186,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
desc = irq_to_desc(irq);
return desc->status & IRQ_NO_BALANCING_MASK;
}
+#endif
/* caller has locked the irq_desc and both params are valid */
static inline void __set_irq_handler_unlocked(int irq,
@@ -140,6 +197,17 @@ static inline void __set_irq_handler_unlocked(int irq,
desc = irq_to_desc(irq);
desc->handle_irq = handler;
}
+
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+static inline void
+__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ desc->preflow_handler = handler;
+}
+#endif
#endif
#endif
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 6811f4bfc6e..922aa313c9f 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
+extern u64 nsecs_to_jiffies64(u64 n);
extern unsigned long nsecs_to_jiffies(u64 n);
#define TIMESTAMP_SIZE 30
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c..7ff16f7d3ed 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
};
#define KTHREAD_WORKER_INIT(worker) { \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
.work_list = LIST_HEAD_INIT((worker).work_list), \
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6385fc17ad..679300c050f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1309,8 +1309,6 @@ int add_from_early_node_map(struct range *range, int az,
int nr_range, int nid);
u64 __init find_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit);
-void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
- u64 goal, u64 limit);
typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f276d4fa01f..9c8603872c3 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,6 @@ struct nameidata {
struct path path;
struct qstr last;
struct path root;
- struct file *file;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq;
@@ -63,6 +62,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
#define LOOKUP_EXCL 0x0400
#define LOOKUP_RENAME_TARGET 0x0800
+#define LOOKUP_JUMPED 0x1000
+#define LOOKUP_ROOT 0x2000
+#define LOOKUP_EMPTY 0x4000
+
extern int user_path_at(int, const char __user *, unsigned, struct path *);
#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
@@ -72,7 +75,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
extern int kern_path(const char *, unsigned, struct path *);
-extern int path_lookup(const char *, unsigned, struct nameidata *);
+extern int kern_path_parent(const char *, struct nameidata *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct nameidata *);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346b034..71caf7a5e6c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2392,6 +2392,9 @@ extern int netdev_notice(const struct net_device *dev, const char *format, ...)
extern int netdev_info(const struct net_device *dev, const char *format, ...)
__attribute__ ((format (printf, 2, 3)));
+#define MODULE_ALIAS_NETDEV(device) \
+ MODULE_ALIAS("netdev-" device)
+
#if defined(DEBUG)
#define netdev_dbg(__dev, format, args...) \
netdev_printk(KERN_DEBUG, __dev, format, ##args)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b197563913b..3e112de12d8 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -68,11 +68,7 @@ struct nfs_client {
unsigned char cl_id_uniquifier;
u32 cl_cb_ident; /* v4.0 callback identifier */
const struct nfs4_minor_version_ops *cl_mvops;
-#endif /* CONFIG_NFS_V4 */
-#ifdef CONFIG_NFS_V4_1
- /* clientid returned from EXCHANGE_ID, used by session operations */
- u64 cl_ex_clid;
/* The sequence id to use for the next CREATE_SESSION */
u32 cl_seqid;
/* The flags used for obtaining the clientid during EXCHANGE_ID */
@@ -80,7 +76,7 @@ struct nfs_client {
struct nfs4_session *cl_session; /* sharred session */
struct list_head cl_layouts;
struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_NFS_V4 */
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache; /* client index cache cookie */
@@ -185,7 +181,7 @@ struct nfs_server {
/* maximum number of slots to use */
#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_NFS_V4)
/* Sessions */
#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
@@ -225,5 +221,5 @@ struct nfs4_session {
struct nfs_client *clp;
};
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_NFS_V4 */
#endif
diff --git a/include/linux/of.h b/include/linux/of.h
index cad7cf0ab27..266db1d0baa 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -23,8 +23,6 @@
#include <asm/byteorder.h>
-#ifdef CONFIG_OF
-
typedef u32 phandle;
typedef u32 ihandle;
@@ -65,11 +63,18 @@ struct device_node {
#endif
};
+#ifdef CONFIG_OF
+
/* Pointer for first entry in chain of all nodes. */
extern struct device_node *allnodes;
extern struct device_node *of_chosen;
extern rwlock_t devtree_lock;
+static inline bool of_have_populated_dt(void)
+{
+ return allnodes != NULL;
+}
+
static inline bool of_node_is_root(const struct device_node *node)
{
return node && (node->parent == NULL);
@@ -222,5 +227,12 @@ extern void of_attach_node(struct device_node *);
extern void of_detach_node(struct device_node *);
#endif
+#else
+
+static inline bool of_have_populated_dt(void)
+{
+ return false;
+}
+
#endif /* CONFIG_OF */
#endif /* _LINUX_OF_H */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
new file mode 100644
index 00000000000..85a27b650d7
--- /dev/null
+++ b/include/linux/of_pci.h
@@ -0,0 +1,9 @@
+#ifndef __OF_PCI_H
+#define __OF_PCI_H
+
+#include <linux/pci.h>
+
+struct pci_dev;
+struct of_irq;
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
+#endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3adb06ebf84..580de67f318 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
+#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 7254eda078e..c9b9f322c8d 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -31,15 +31,17 @@
*
* Simple ASCII art explanation:
*
- * |HEAD |
- * | |
- * |prio_list.prev|<------------------------------------|
- * |prio_list.next|<->|pl|<->|pl|<--------------->|pl|<-|
- * |10 | |10| |21| |21| |21| |40| (prio)
- * | | | | | | | | | | | |
- * | | | | | | | | | | | |
- * |node_list.next|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
- * |node_list.prev|<------------------------------------|
+ * pl:prio_list (only for plist_node)
+ * nl:node_list
+ * HEAD| NODE(S)
+ * |
+ * ||------------------------------------|
+ * ||->|pl|<->|pl|<--------------->|pl|<-|
+ * | |10| |21| |21| |21| |40| (prio)
+ * | | | | | | | | | | |
+ * | | | | | | | | | | |
+ * |->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
+ * |-------------------------------------------|
*
* The nodes on the prio_list list are sorted by priority to simplify
* the insertion of new nodes. There are no nodes with duplicate
@@ -78,7 +80,6 @@
#include <linux/spinlock_types.h>
struct plist_head {
- struct list_head prio_list;
struct list_head node_list;
#ifdef CONFIG_DEBUG_PI_LIST
raw_spinlock_t *rawlock;
@@ -88,7 +89,8 @@ struct plist_head {
struct plist_node {
int prio;
- struct plist_head plist;
+ struct list_head prio_list;
+ struct list_head node_list;
};
#ifdef CONFIG_DEBUG_PI_LIST
@@ -100,7 +102,6 @@ struct plist_node {
#endif
#define _PLIST_HEAD_INIT(head) \
- .prio_list = LIST_HEAD_INIT((head).prio_list), \
.node_list = LIST_HEAD_INIT((head).node_list)
/**
@@ -133,7 +134,8 @@ struct plist_node {
#define PLIST_NODE_INIT(node, __prio) \
{ \
.prio = (__prio), \
- .plist = { _PLIST_HEAD_INIT((node).plist) }, \
+ .prio_list = LIST_HEAD_INIT((node).prio_list), \
+ .node_list = LIST_HEAD_INIT((node).node_list), \
}
/**
@@ -144,7 +146,6 @@ struct plist_node {
static inline void
plist_head_init(struct plist_head *head, spinlock_t *lock)
{
- INIT_LIST_HEAD(&head->prio_list);
INIT_LIST_HEAD(&head->node_list);
#ifdef CONFIG_DEBUG_PI_LIST
head->spinlock = lock;
@@ -160,7 +161,6 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
static inline void
plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
{
- INIT_LIST_HEAD(&head->prio_list);
INIT_LIST_HEAD(&head->node_list);
#ifdef CONFIG_DEBUG_PI_LIST
head->rawlock = lock;
@@ -176,7 +176,8 @@ plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
static inline void plist_node_init(struct plist_node *node, int prio)
{
node->prio = prio;
- plist_head_init(&node->plist, NULL);
+ INIT_LIST_HEAD(&node->prio_list);
+ INIT_LIST_HEAD(&node->node_list);
}
extern void plist_add(struct plist_node *node, struct plist_head *head);
@@ -188,7 +189,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* @head: the head for your list
*/
#define plist_for_each(pos, head) \
- list_for_each_entry(pos, &(head)->node_list, plist.node_list)
+ list_for_each_entry(pos, &(head)->node_list, node_list)
/**
* plist_for_each_safe - iterate safely over a plist of given type
@@ -199,7 +200,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* Iterate over a plist of given type, safe against removal of list entry.
*/
#define plist_for_each_safe(pos, n, head) \
- list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list)
+ list_for_each_entry_safe(pos, n, &(head)->node_list, node_list)
/**
* plist_for_each_entry - iterate over list of given type
@@ -208,7 +209,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* @mem: the name of the list_struct within the struct
*/
#define plist_for_each_entry(pos, head, mem) \
- list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list)
+ list_for_each_entry(pos, &(head)->node_list, mem.node_list)
/**
* plist_for_each_entry_safe - iterate safely over list of given type
@@ -220,7 +221,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* Iterate over list of given type, safe against removal of list entry.
*/
#define plist_for_each_entry_safe(pos, n, head, m) \
- list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list)
+ list_for_each_entry_safe(pos, n, &(head)->node_list, m.node_list)
/**
* plist_head_empty - return !0 if a plist_head is empty
@@ -237,7 +238,7 @@ static inline int plist_head_empty(const struct plist_head *head)
*/
static inline int plist_node_empty(const struct plist_node *node)
{
- return plist_head_empty(&node->plist);
+ return list_empty(&node->node_list);
}
/* All functions below assume the plist_head is not empty. */
@@ -285,7 +286,7 @@ static inline int plist_node_empty(const struct plist_node *node)
static inline struct plist_node *plist_first(const struct plist_head *head)
{
return list_entry(head->node_list.next,
- struct plist_node, plist.node_list);
+ struct plist_node, node_list);
}
/**
@@ -297,7 +298,7 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
static inline struct plist_node *plist_last(const struct plist_head *head)
{
return list_entry(head->node_list.prev,
- struct plist_node, plist.node_list);
+ struct plist_node, node_list);
}
#endif
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
new file mode 100644
index 00000000000..369e19d3750
--- /dev/null
+++ b/include/linux/posix-clock.h
@@ -0,0 +1,150 @@
+/*
+ * posix-clock.h - support for dynamic clock devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_POSIX_CLOCK_H_
+#define _LINUX_POSIX_CLOCK_H_
+
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/posix-timers.h>
+
+struct posix_clock;
+
+/**
+ * struct posix_clock_operations - functional interface to the clock
+ *
+ * Every posix clock is represented by a character device. Drivers may
+ * optionally offer extended capabilities by implementing the
+ * character device methods. The character device file operations are
+ * first handled by the clock device layer, then passed on to the
+ * driver by calling these functions.
+ *
+ * @owner: The clock driver should set to THIS_MODULE
+ * @clock_adjtime: Adjust the clock
+ * @clock_gettime: Read the current time
+ * @clock_getres: Get the clock resolution
+ * @clock_settime: Set the current time value
+ * @timer_create: Create a new timer
+ * @timer_delete: Remove a previously created timer
+ * @timer_gettime: Get remaining time and interval of a timer
+ * @timer_setttime: Set a timer's initial expiration and interval
+ * @fasync: Optional character device fasync method
+ * @mmap: Optional character device mmap method
+ * @open: Optional character device open method
+ * @release: Optional character device release method
+ * @ioctl: Optional character device ioctl method
+ * @read: Optional character device read method
+ * @poll: Optional character device poll method
+ */
+struct posix_clock_operations {
+ struct module *owner;
+
+ int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
+
+ int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts);
+
+ int (*clock_getres) (struct posix_clock *pc, struct timespec *ts);
+
+ int (*clock_settime)(struct posix_clock *pc,
+ const struct timespec *ts);
+
+ int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
+
+ int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
+
+ void (*timer_gettime)(struct posix_clock *pc,
+ struct k_itimer *kit, struct itimerspec *tsp);
+
+ int (*timer_settime)(struct posix_clock *pc,
+ struct k_itimer *kit, int flags,
+ struct itimerspec *tsp, struct itimerspec *old);
+ /*
+ * Optional character device methods:
+ */
+ int (*fasync) (struct posix_clock *pc,
+ int fd, struct file *file, int on);
+
+ long (*ioctl) (struct posix_clock *pc,
+ unsigned int cmd, unsigned long arg);
+
+ int (*mmap) (struct posix_clock *pc,
+ struct vm_area_struct *vma);
+
+ int (*open) (struct posix_clock *pc, fmode_t f_mode);
+
+ uint (*poll) (struct posix_clock *pc,
+ struct file *file, poll_table *wait);
+
+ int (*release) (struct posix_clock *pc);
+
+ ssize_t (*read) (struct posix_clock *pc,
+ uint flags, char __user *buf, size_t cnt);
+};
+
+/**
+ * struct posix_clock - represents a dynamic posix clock
+ *
+ * @ops: Functional interface to the clock
+ * @cdev: Character device instance for this clock
+ * @kref: Reference count.
+ * @mutex: Protects the 'zombie' field from concurrent access.
+ * @zombie: If 'zombie' is true, then the hardware has disappeared.
+ * @release: A function to free the structure when the reference count reaches
+ * zero. May be NULL if structure is statically allocated.
+ *
+ * Drivers should embed their struct posix_clock within a private
+ * structure, obtaining a reference to it during callbacks using
+ * container_of().
+ */
+struct posix_clock {
+ struct posix_clock_operations ops;
+ struct cdev cdev;
+ struct kref kref;
+ struct mutex mutex;
+ bool zombie;
+ void (*release)(struct posix_clock *clk);
+};
+
+/**
+ * posix_clock_register() - register a new clock
+ * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
+ * @devid: Allocated device id
+ *
+ * A clock driver calls this function to register itself with the
+ * clock device subsystem. If 'clk' points to dynamically allocated
+ * memory, then the caller must provide a 'release' function to free
+ * that memory.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+int posix_clock_register(struct posix_clock *clk, dev_t devid);
+
+/**
+ * posix_clock_unregister() - unregister a clock
+ * @clk: Clock instance previously registered via posix_clock_register()
+ *
+ * A clock driver calls this function to remove itself from the clock
+ * device subsystem. The posix_clock itself will remain (in an
+ * inactive state) until its reference count drops to zero, at which
+ * point it will be deallocated with its 'release' method.
+ */
+void posix_clock_unregister(struct posix_clock *clk);
+
+#endif
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3e23844a699..d51243ae072 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/timex.h>
union cpu_time_count {
cputime_t cpu;
@@ -17,10 +18,21 @@ struct cpu_timer_list {
int firing;
};
+/*
+ * Bit fields within a clockid:
+ *
+ * The most significant 29 bits hold either a pid or a file descriptor.
+ *
+ * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ *
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ *
+ * A clockid is invalid if bits 2, 1, and 0 are all set.
+ */
#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
#define CPUCLOCK_PERTHREAD(clock) \
(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
-#define CPUCLOCK_PID_MASK 7
+
#define CPUCLOCK_PERTHREAD_MASK 4
#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
#define CPUCLOCK_CLOCK_MASK 3
@@ -28,12 +40,17 @@ struct cpu_timer_list {
#define CPUCLOCK_VIRT 1
#define CPUCLOCK_SCHED 2
#define CPUCLOCK_MAX 3
+#define CLOCKFD CPUCLOCK_MAX
+#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
#define MAKE_THREAD_CPUCLOCK(tid, clock) \
MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK)
+#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
+#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3))
+
/* POSIX.1b interval timer structure. */
struct k_itimer {
struct list_head list; /* free/ allocate list */
@@ -67,10 +84,11 @@ struct k_itimer {
};
struct k_clock {
- int res; /* in nanoseconds */
int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
- int (*clock_set) (const clockid_t which_clock, struct timespec * tp);
+ int (*clock_set) (const clockid_t which_clock,
+ const struct timespec *tp);
int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
+ int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
int (*timer_create) (struct k_itimer *timer);
int (*nsleep) (const clockid_t which_clock, int flags,
struct timespec *, struct timespec __user *);
@@ -84,28 +102,14 @@ struct k_clock {
struct itimerspec * cur_setting);
};
-void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock);
+extern struct k_clock clock_posix_cpu;
+extern struct k_clock clock_posix_dynamic;
-/* error handlers for timer_create, nanosleep and settime */
-int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
- struct timespec __user *);
-int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
+void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
/* function to call to trigger timer event */
int posix_timer_event(struct k_itimer *timr, int si_private);
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts);
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts);
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
-int posix_cpu_timer_create(struct k_itimer *timer);
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp);
-long posix_cpu_nsleep_restart(struct restart_block *restart_block);
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old);
-int posix_cpu_timer_del(struct k_itimer *timer);
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp);
-
void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 89c3e518299..2ca7e8a7806 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -133,7 +133,6 @@ extern struct class *rtc_class;
* The (current) exceptions are mostly filesystem hooks:
* - the proc() hook for procfs
* - non-ioctl() chardev hooks: open(), release(), read_callback()
- * - periodic irq calls: irq_set_state(), irq_set_freq()
*
* REVISIT those periodic irq calls *do* have ops_lock when they're
* issued through ioctl() ...
@@ -148,11 +147,8 @@ struct rtc_class_ops {
int (*set_alarm)(struct device *, struct rtc_wkalrm *);
int (*proc)(struct device *, struct seq_file *);
int (*set_mmss)(struct device *, unsigned long secs);
- int (*irq_set_state)(struct device *, int enabled);
- int (*irq_set_freq)(struct device *, int freq);
int (*read_callback)(struct device *, int data);
int (*alarm_irq_enable)(struct device *, unsigned int enabled);
- int (*update_irq_enable)(struct device *, unsigned int enabled);
};
#define RTC_DEVICE_NAME_SIZE 20
@@ -227,6 +223,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
+int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
extern int rtc_read_alarm(struct rtc_device *rtc,
struct rtc_wkalrm *alrm);
extern int rtc_set_alarm(struct rtc_device *rtc,
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8..cc0072e93e3 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
RW_DEP_MAP_INIT(lockname) }
#endif
-/*
- * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
- * deprecated.
- *
- * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
- */
-#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
-
#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc252797..34701241b67 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
#endif
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
#ifdef __KERNEL__
-
-#include <linux/types.h>
-
-struct rwsem_waiter;
-
/*
* the rw-semaphore definition
* - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
#endif
};
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
+#define RWSEM_UNLOCKED_VALUE 0x00000000
extern void __down_read(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca..a8afe9cd000 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
#include <asm/system.h>
#include <asm/atomic.h>
@@ -19,9 +22,57 @@ struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#include <linux/rwsem-spinlock.h> /* use a generic implementation */
#else
-#include <asm/rwsem.h> /* use an arch-specific implementation */
+/* All arch specific implementations share the same struct */
+struct rw_semaphore {
+ long count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
+/* Include the arch specific part */
+#include <asm/rwsem.h>
+
+/* In all implementations count != 0 means locked */
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return sem->count != 0;
+}
+
+#endif
+
+/* Common initializer macros and functions */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
+#define __RWSEM_INITIALIZER(name) \
+ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
+ LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+
+#define DECLARE_RWSEM(name) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key);
+
+#define init_rwsem(sem) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __init_rwsem((sem), #sem, &__key); \
+} while (0)
+
/*
* lock for reading
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c57e5278df8..c15936fe998 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1058,6 +1058,7 @@ struct sched_class {
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
+ bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
@@ -1084,12 +1085,10 @@ struct sched_class {
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_fork) (struct task_struct *p);
- void (*switched_from) (struct rq *this_rq, struct task_struct *task,
- int running);
- void (*switched_to) (struct rq *this_rq, struct task_struct *task,
- int running);
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
- int oldprio, int running);
+ int oldprio);
unsigned int (*get_rr_interval) (struct rq *rq,
struct task_struct *task);
@@ -1715,7 +1714,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
/*
* Per process flags
*/
-#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */
#define PF_STARTING 0x00000002 /* being created */
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
@@ -1945,8 +1943,6 @@ int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
-extern unsigned int sysctl_sched_compat_yield;
-
#ifdef CONFIG_SCHED_AUTOGROUP
extern unsigned int sysctl_sched_autogroup_enabled;
@@ -1977,6 +1973,7 @@ static inline int rt_mutex_getprio(struct task_struct *p)
# define rt_mutex_adjust_pi(p) do { } while (0)
#endif
+extern bool yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
extern int task_nice(const struct task_struct *p);
@@ -2049,7 +2046,7 @@ extern void release_uids(struct user_namespace *ns);
#include <asm/current.h>
-extern void do_timer(unsigned long ticks);
+extern void xtime_update(unsigned long ticks);
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
diff --git a/include/linux/security.h b/include/linux/security.h
index b2b7f9749f5..debbd97db7a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,7 +53,7 @@ struct audit_krule;
*/
extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
-extern int cap_settime(struct timespec *ts, struct timezone *tz);
+extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
extern int cap_ptrace_traceme(struct task_struct *parent);
extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
@@ -1387,7 +1387,7 @@ struct security_operations {
int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
int (*quota_on) (struct dentry *dentry);
int (*syslog) (int type);
- int (*settime) (struct timespec *ts, struct timezone *tz);
+ int (*settime) (const struct timespec *ts, const struct timezone *tz);
int (*vm_enough_memory) (struct mm_struct *mm, long pages);
int (*bprm_set_creds) (struct linux_binprm *bprm);
@@ -1669,7 +1669,7 @@ int security_sysctl(struct ctl_table *table, int op);
int security_quotactl(int cmds, int type, int id, struct super_block *sb);
int security_quota_on(struct dentry *dentry);
int security_syslog(int type);
-int security_settime(struct timespec *ts, struct timezone *tz);
+int security_settime(const struct timespec *ts, const struct timezone *tz);
int security_vm_enough_memory(long pages);
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
int security_vm_enough_memory_kern(long pages);
@@ -1904,7 +1904,8 @@ static inline int security_syslog(int type)
return 0;
}
-static inline int security_settime(struct timespec *ts, struct timezone *tz)
+static inline int security_settime(const struct timespec *ts,
+ const struct timezone *tz)
{
return cap_settime(ts, tz);
}
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720..73548eb13a5 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
#define __SPIN_LOCK_UNLOCKED(lockname) \
(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
-/*
- * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
- * deprecated.
- * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
- * appropriate.
- */
-#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
-
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
#include <linux/rwlock_types.h>
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 88513fd8e20..d81db8012c6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -212,6 +212,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
const struct rpc_call_ops *ops);
void rpc_put_task(struct rpc_task *);
+void rpc_put_task_async(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
void rpc_exit(struct rpc_task *, int);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a17fcea2ca5..1f5c18e6f4f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -62,6 +62,7 @@ struct robust_list_head;
struct getcpu_cache;
struct old_linux_dirent;
struct perf_event_attr;
+struct file_handle;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -315,6 +316,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
const struct timespec __user *tp);
asmlinkage long sys_clock_gettime(clockid_t which_clock,
struct timespec __user *tp);
+asmlinkage long sys_clock_adjtime(clockid_t which_clock,
+ struct timex __user *tx);
asmlinkage long sys_clock_getres(clockid_t which_clock,
struct timespec __user *tp);
asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
@@ -834,5 +837,10 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff);
asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
-
+asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
+ struct file_handle __user *handle,
+ int __user *mnt_id, int flag);
+asmlinkage long sys_open_by_handle_at(int mountdirfd,
+ struct file_handle __user *handle,
+ int flags);
#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb64f3b..11684d9e6bd 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -930,6 +930,7 @@ enum
#ifdef __KERNEL__
#include <linux/list.h>
+#include <linux/rcupdate.h>
/* For the /proc/sys support */
struct ctl_table;
@@ -1037,10 +1038,15 @@ struct ctl_table_root {
struct ctl_table trees. */
struct ctl_table_header
{
- struct ctl_table *ctl_table;
- struct list_head ctl_entry;
- int used;
- int count;
+ union {
+ struct {
+ struct ctl_table *ctl_table;
+ struct list_head ctl_entry;
+ int used;
+ int count;
+ };
+ struct rcu_head rcu;
+ };
struct completion *unregistering;
struct ctl_table *ctl_table_arg;
struct ctl_table_root *root;
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c9069654417..20fc303947d 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -18,9 +18,6 @@ struct compat_timespec;
struct restart_block {
long (*fn)(struct restart_block *);
union {
- struct {
- unsigned long arg0, arg1, arg2, arg3;
- };
/* For futex_wait and futex_wait_requeue_pi */
struct {
u32 __user *uaddr;
diff --git a/include/linux/time.h b/include/linux/time.h
index 1e6d3b59238..454a2620578 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
#define timespec_valid(ts) \
(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
-extern seqlock_t xtime_lock;
-
extern void read_persistent_clock(struct timespec *ts);
extern void read_boot_clock(struct timespec *ts);
extern int update_persistent_clock(struct timespec now);
@@ -125,8 +123,9 @@ extern int timekeeping_suspended;
unsigned long get_seconds(void);
struct timespec current_kernel_time(void);
struct timespec __current_kernel_time(void); /* does not take xtime_lock */
-struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */
struct timespec get_monotonic_coarse(void);
+void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
+ struct timespec *wtom, struct timespec *sleep);
#define CURRENT_TIME (current_kernel_time())
#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
@@ -147,8 +146,9 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
extern void do_gettimeofday(struct timeval *tv);
-extern int do_settimeofday(struct timespec *tv);
-extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
+extern int do_settimeofday(const struct timespec *tv);
+extern int do_sys_settimeofday(const struct timespec *tv,
+ const struct timezone *tz);
#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
struct itimerval;
@@ -162,12 +162,13 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw,
struct timespec *ts_real);
extern void getboottime(struct timespec *ts);
extern void monotonic_to_bootbased(struct timespec *ts);
+extern void get_monotonic_boottime(struct timespec *ts);
extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
-extern void update_wall_time(void);
extern void timekeeping_leap_insert(int leapsecond);
+extern int timekeeping_inject_offset(struct timespec *ts);
struct tms;
extern void do_sys_times(struct tms *);
@@ -292,6 +293,7 @@ struct itimerval {
#define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
/*
* The IDs of various hardware clocks:
diff --git a/include/linux/timex.h b/include/linux/timex.h
index d23999f9499..aa60fe7b6ed 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -73,7 +73,7 @@ struct timex {
long tolerance; /* clock frequency tolerance (ppm)
* (read only)
*/
- struct timeval time; /* (read only) */
+ struct timeval time; /* (read only, except for ADJ_SETOFFSET) */
long tick; /* (modified) usecs between clock ticks */
long ppsfreq; /* pps frequency (scaled ppm) (ro) */
@@ -102,6 +102,7 @@ struct timex {
#define ADJ_STATUS 0x0010 /* clock status */
#define ADJ_TIMECONST 0x0020 /* pll time constant */
#define ADJ_TAI 0x0080 /* set TAI offset */
+#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
#define ADJ_MICRO 0x1000 /* select microsecond resolution */
#define ADJ_NANO 0x2000 /* select nanosecond resolution */
#define ADJ_TICK 0x4000 /* tick value */
diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
index 24694051157..2e8ec51f061 100644
--- a/include/target/target_core_transport.h
+++ b/include/target/target_core_transport.h
@@ -135,6 +135,8 @@ extern void transport_complete_task(struct se_task *, int);
extern void transport_add_task_to_execute_queue(struct se_task *,
struct se_task *,
struct se_device *);
+extern void transport_remove_task_from_execute_queue(struct se_task *,
+ struct se_device *);
unsigned char *transport_dump_cmd_direction(struct se_cmd *);
extern void transport_dump_dev_state(struct se_device *, char *, int *);
extern void transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/include/xen/events.h b/include/xen/events.h
index 00f53ddcc06..962da2ced5b 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -75,11 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
#ifdef CONFIG_PCI_MSI
-/* Allocate an irq and a pirq to be used with MSIs. */
-#define XEN_ALLOC_PIRQ (1 << 0)
-#define XEN_ALLOC_IRQ (1 << 1)
-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ int pirq, int vector, const char *name);
#endif
/* De-allocates the above mentioned physical interrupt. */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index c2d1fa4dc1e..61e523af3c4 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
-struct blkif_request {
- uint8_t operation; /* BLKIF_OP_??? */
- uint8_t nr_segments; /* number of segments */
- blkif_vdev_t handle; /* only for read/write requests */
- uint64_t id; /* private guest value, echoed in resp */
+struct blkif_request_rw {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
@@ -65,6 +61,16 @@ struct blkif_request {
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
+struct blkif_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ union {
+ struct blkif_request_rw rw;
+ } u;
+};
+
struct blkif_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
#define VDISK_REMOVABLE 0x2
#define VDISK_READONLY 0x4
+/* Xen-defined major numbers for virtual disks, they look strangely
+ * familiar */
+#define XEN_IDE0_MAJOR 3
+#define XEN_IDE1_MAJOR 22
+#define XEN_SCSI_DISK0_MAJOR 8
+#define XEN_SCSI_DISK1_MAJOR 65
+#define XEN_SCSI_DISK2_MAJOR 66
+#define XEN_SCSI_DISK3_MAJOR 67
+#define XEN_SCSI_DISK4_MAJOR 68
+#define XEN_SCSI_DISK5_MAJOR 69
+#define XEN_SCSI_DISK6_MAJOR 70
+#define XEN_SCSI_DISK7_MAJOR 71
+#define XEN_SCSI_DISK8_MAJOR 128
+#define XEN_SCSI_DISK9_MAJOR 129
+#define XEN_SCSI_DISK10_MAJOR 130
+#define XEN_SCSI_DISK11_MAJOR 131
+#define XEN_SCSI_DISK12_MAJOR 132
+#define XEN_SCSI_DISK13_MAJOR 133
+#define XEN_SCSI_DISK14_MAJOR 134
+#define XEN_SCSI_DISK15_MAJOR 135
+
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e2f1b..b33257bc7e8 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -30,7 +30,7 @@
#define __HYPERVISOR_stack_switch 3
#define __HYPERVISOR_set_callbacks 4
#define __HYPERVISOR_fpu_taskswitch 5
-#define __HYPERVISOR_sched_op 6
+#define __HYPERVISOR_sched_op_compat 6
#define __HYPERVISOR_dom0_op 7
#define __HYPERVISOR_set_debugreg 8
#define __HYPERVISOR_get_debugreg 9
@@ -52,7 +52,7 @@
#define __HYPERVISOR_mmuext_op 26
#define __HYPERVISOR_acm_op 27
#define __HYPERVISOR_nmi_op 28
-#define __HYPERVISOR_sched_op_new 29
+#define __HYPERVISOR_sched_op 29
#define __HYPERVISOR_callback_op 30
#define __HYPERVISOR_xenoprof_op 31
#define __HYPERVISOR_event_channel_op 32
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 98b92154a26..03c85d7387f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,9 +5,9 @@
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
-void xen_pre_suspend(void);
-void xen_post_suspend(int suspend_cancelled);
-void xen_hvm_post_suspend(int suspend_cancelled);
+void xen_arch_pre_suspend(void);
+void xen_arch_post_suspend(int suspend_cancelled);
+void xen_arch_hvm_post_suspend(int suspend_cancelled);
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
diff --git a/init/Kconfig b/init/Kconfig
index 4c4edf2ec4a..5721d27af62 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3
for processing it. A preliminary version of these tools is available
at <http://www.gnu.org/software/acct/>.
+config FHANDLE
+ bool "open by fhandle syscalls"
+ select EXPORTFS
+ help
+ If you say Y here, a user level program will be able to map
+ file names to handle and then later use the handle for
+ different file system operations. This is useful in implementing
+ userspace file servers, which now track files using handles instead
+ of names. The handle would remain the same even if file names
+ get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
+ syscalls.
+
config TASKSTATS
bool "Export task/process statistics through netlink (EXPERIMENTAL)"
depends on NET
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d2e3c786646..e683869365d 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
}
/* Initialize a parent watch entry. */
-static struct audit_parent *audit_init_parent(struct nameidata *ndp)
+static struct audit_parent *audit_init_parent(struct path *path)
{
- struct inode *inode = ndp->path.dentry->d_inode;
+ struct inode *inode = path->dentry->d_inode;
struct audit_parent *parent;
int ret;
@@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
}
/* Get path information necessary for adding watches. */
-static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw)
+static int audit_get_nd(struct audit_watch *watch, struct path *parent)
{
- struct nameidata *ndparent, *ndwatch;
+ struct nameidata nd;
+ struct dentry *d;
int err;
- ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
- if (unlikely(!ndparent))
- return -ENOMEM;
+ err = kern_path_parent(watch->path, &nd);
+ if (err)
+ return err;
- ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
- if (unlikely(!ndwatch)) {
- kfree(ndparent);
- return -ENOMEM;
+ if (nd.last_type != LAST_NORM) {
+ path_put(&nd.path);
+ return -EINVAL;
}
- err = path_lookup(path, LOOKUP_PARENT, ndparent);
- if (err) {
- kfree(ndparent);
- kfree(ndwatch);
- return err;
+ mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+ if (IS_ERR(d)) {
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ path_put(&nd.path);
+ return PTR_ERR(d);
}
-
- err = path_lookup(path, 0, ndwatch);
- if (err) {
- kfree(ndwatch);
- ndwatch = NULL;
+ if (d->d_inode) {
+ /* update watch filter fields */
+ watch->dev = d->d_inode->i_sb->s_dev;
+ watch->ino = d->d_inode->i_ino;
}
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- *ndp = ndparent;
- *ndw = ndwatch;
-
+ *parent = nd.path;
+ dput(d);
return 0;
}
-/* Release resources used for watch path information. */
-static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
-{
- if (ndp) {
- path_put(&ndp->path);
- kfree(ndp);
- }
- if (ndw) {
- path_put(&ndw->path);
- kfree(ndw);
- }
-}
-
/* Associate the given rule with an existing parent.
* Caller must hold audit_filter_mutex. */
static void audit_add_to_parent(struct audit_krule *krule,
@@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
{
struct audit_watch *watch = krule->watch;
struct audit_parent *parent;
- struct nameidata *ndp = NULL, *ndw = NULL;
+ struct path parent_path;
int h, ret = 0;
mutex_unlock(&audit_filter_mutex);
/* Avoid calling path_lookup under audit_filter_mutex. */
- ret = audit_get_nd(watch->path, &ndp, &ndw);
- if (ret) {
- /* caller expects mutex locked */
- mutex_lock(&audit_filter_mutex);
- goto error;
- }
+ ret = audit_get_nd(watch, &parent_path);
+ /* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- /* update watch filter fields */
- if (ndw) {
- watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
- watch->ino = ndw->path.dentry->d_inode->i_ino;
- }
+ if (ret)
+ return ret;
/* either find an old parent or attach a new one */
- parent = audit_find_parent(ndp->path.dentry->d_inode);
+ parent = audit_find_parent(parent_path.dentry->d_inode);
if (!parent) {
- parent = audit_init_parent(ndp);
+ parent = audit_init_parent(&parent_path);
if (IS_ERR(parent)) {
ret = PTR_ERR(parent);
goto error;
@@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
h = audit_hash_ino((u32)watch->ino);
*list = &audit_inode_hash[h];
error:
- audit_put_nd(ndp, ndw); /* NULL args OK */
+ path_put(&parent_path);
return ret;
-
}
void audit_remove_watch_rule(struct audit_krule *krule)
diff --git a/kernel/compat.c b/kernel/compat.c
index c9e2ec0b34a..38b1d2c1cbe 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o,
put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
}
+static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
+{
+ memset(txc, 0, sizeof(struct timex));
+
+ if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
+ __get_user(txc->modes, &utp->modes) ||
+ __get_user(txc->offset, &utp->offset) ||
+ __get_user(txc->freq, &utp->freq) ||
+ __get_user(txc->maxerror, &utp->maxerror) ||
+ __get_user(txc->esterror, &utp->esterror) ||
+ __get_user(txc->status, &utp->status) ||
+ __get_user(txc->constant, &utp->constant) ||
+ __get_user(txc->precision, &utp->precision) ||
+ __get_user(txc->tolerance, &utp->tolerance) ||
+ __get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+ __get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+ __get_user(txc->tick, &utp->tick) ||
+ __get_user(txc->ppsfreq, &utp->ppsfreq) ||
+ __get_user(txc->jitter, &utp->jitter) ||
+ __get_user(txc->shift, &utp->shift) ||
+ __get_user(txc->stabil, &utp->stabil) ||
+ __get_user(txc->jitcnt, &utp->jitcnt) ||
+ __get_user(txc->calcnt, &utp->calcnt) ||
+ __get_user(txc->errcnt, &utp->errcnt) ||
+ __get_user(txc->stbcnt, &utp->stbcnt))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
+{
+ if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
+ __put_user(txc->modes, &utp->modes) ||
+ __put_user(txc->offset, &utp->offset) ||
+ __put_user(txc->freq, &utp->freq) ||
+ __put_user(txc->maxerror, &utp->maxerror) ||
+ __put_user(txc->esterror, &utp->esterror) ||
+ __put_user(txc->status, &utp->status) ||
+ __put_user(txc->constant, &utp->constant) ||
+ __put_user(txc->precision, &utp->precision) ||
+ __put_user(txc->tolerance, &utp->tolerance) ||
+ __put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+ __put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+ __put_user(txc->tick, &utp->tick) ||
+ __put_user(txc->ppsfreq, &utp->ppsfreq) ||
+ __put_user(txc->jitter, &utp->jitter) ||
+ __put_user(txc->shift, &utp->shift) ||
+ __put_user(txc->stabil, &utp->stabil) ||
+ __put_user(txc->jitcnt, &utp->jitcnt) ||
+ __put_user(txc->calcnt, &utp->calcnt) ||
+ __put_user(txc->errcnt, &utp->errcnt) ||
+ __put_user(txc->stbcnt, &utp->stbcnt) ||
+ __put_user(txc->tai, &utp->tai))
+ return -EFAULT;
+ return 0;
+}
+
asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz)
{
@@ -617,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock,
return err;
}
+long compat_sys_clock_adjtime(clockid_t which_clock,
+ struct compat_timex __user *utp)
+{
+ struct timex txc;
+ mm_segment_t oldfs;
+ int err, ret;
+
+ err = compat_get_timex(&txc, utp);
+ if (err)
+ return err;
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
+ set_fs(oldfs);
+
+ err = compat_put_timex(utp, &txc);
+ if (err)
+ return err;
+
+ return ret;
+}
+
long compat_sys_clock_getres(clockid_t which_clock,
struct compat_timespec __user *tp)
{
@@ -951,58 +1032,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
{
struct timex txc;
- int ret;
-
- memset(&txc, 0, sizeof(struct timex));
+ int err, ret;
- if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
- __get_user(txc.modes, &utp->modes) ||
- __get_user(txc.offset, &utp->offset) ||
- __get_user(txc.freq, &utp->freq) ||
- __get_user(txc.maxerror, &utp->maxerror) ||
- __get_user(txc.esterror, &utp->esterror) ||
- __get_user(txc.status, &utp->status) ||
- __get_user(txc.constant, &utp->constant) ||
- __get_user(txc.precision, &utp->precision) ||
- __get_user(txc.tolerance, &utp->tolerance) ||
- __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
- __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
- __get_user(txc.tick, &utp->tick) ||
- __get_user(txc.ppsfreq, &utp->ppsfreq) ||
- __get_user(txc.jitter, &utp->jitter) ||
- __get_user(txc.shift, &utp->shift) ||
- __get_user(txc.stabil, &utp->stabil) ||
- __get_user(txc.jitcnt, &utp->jitcnt) ||
- __get_user(txc.calcnt, &utp->calcnt) ||
- __get_user(txc.errcnt, &utp->errcnt) ||
- __get_user(txc.stbcnt, &utp->stbcnt))
- return -EFAULT;
+ err = compat_get_timex(&txc, utp);
+ if (err)
+ return err;
ret = do_adjtimex(&txc);
- if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
- __put_user(txc.modes, &utp->modes) ||
- __put_user(txc.offset, &utp->offset) ||
- __put_user(txc.freq, &utp->freq) ||
- __put_user(txc.maxerror, &utp->maxerror) ||
- __put_user(txc.esterror, &utp->esterror) ||
- __put_user(txc.status, &utp->status) ||
- __put_user(txc.constant, &utp->constant) ||
- __put_user(txc.precision, &utp->precision) ||
- __put_user(txc.tolerance, &utp->tolerance) ||
- __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
- __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
- __put_user(txc.tick, &utp->tick) ||
- __put_user(txc.ppsfreq, &utp->ppsfreq) ||
- __put_user(txc.jitter, &utp->jitter) ||
- __put_user(txc.shift, &utp->shift) ||
- __put_user(txc.stabil, &utp->stabil) ||
- __put_user(txc.jitcnt, &utp->jitcnt) ||
- __put_user(txc.calcnt, &utp->calcnt) ||
- __put_user(txc.errcnt, &utp->errcnt) ||
- __put_user(txc.stbcnt, &utp->stbcnt) ||
- __put_user(txc.tai, &utp->tai))
- ret = -EFAULT;
+ err = compat_put_timex(utp, &txc);
+ if (err)
+ return err;
return ret;
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a9d6dd53a6..2343c132c5a 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
static struct thread_group_cred init_tgcred = {
.usage = ATOMIC_INIT(2),
.tgid = 0,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
};
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd..bda41571538 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
return NULL;
}
-static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
+static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
+ u32 uval, u32 newval)
{
- u32 curval;
+ int ret;
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
pagefault_enable();
- return curval;
+ return ret;
}
static int get_futex_value_locked(u32 *dest, u32 __user *from)
@@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
struct task_struct *task, int set_waiters)
{
int lock_taken, ret, ownerdied = 0;
- u32 uval, newval, curval;
+ u32 uval, newval, curval, vpid = task_pid_vnr(task);
retry:
ret = lock_taken = 0;
@@ -684,19 +685,17 @@ retry:
* (by doing a 0 -> TID atomic cmpxchg), while holding all
* the locks. It will most likely not succeed.
*/
- newval = task_pid_vnr(task);
+ newval = vpid;
if (set_waiters)
newval |= FUTEX_WAITERS;
- curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
-
- if (unlikely(curval == -EFAULT))
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
return -EFAULT;
/*
* Detect deadlocks.
*/
- if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
+ if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
return -EDEADLK;
/*
@@ -723,14 +722,12 @@ retry:
*/
if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
/* Keep the OWNER_DIED bit */
- newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
+ newval = (curval & ~FUTEX_TID_MASK) | vpid;
ownerdied = 0;
lock_taken = 1;
}
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
- if (unlikely(curval == -EFAULT))
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
return -EFAULT;
if (unlikely(curval != uval))
goto retry;
@@ -775,6 +772,24 @@ retry:
return ret;
}
+/**
+ * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
+ * @q: The futex_q to unqueue
+ *
+ * The q->lock_ptr must not be NULL and must be held by the caller.
+ */
+static void __unqueue_futex(struct futex_q *q)
+{
+ struct futex_hash_bucket *hb;
+
+ if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
+ || plist_node_empty(&q->list)))
+ return;
+
+ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
+ plist_del(&q->list, &hb->chain);
+}
+
/*
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.
@@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q)
*/
get_task_struct(p);
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
/*
* The waiting task can free the futex_q as soon as
* q->lock_ptr = NULL is written, without taking any locks. A
@@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
- if (curval == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
ret = -EFAULT;
else if (curval != uval)
ret = -EINVAL;
@@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
- oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
-
- if (oldval == -EFAULT)
- return oldval;
+ if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
+ return -EFAULT;
if (oldval != uval)
return -EAGAIN;
@@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
plist_del(&q->list, &hb1->chain);
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.spinlock = &hb2->lock;
-#endif
}
get_futex_key_refs(key2);
q->key = *key2;
@@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
get_futex_key_refs(key);
q->key = *key;
- WARN_ON(plist_node_empty(&q->list));
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
q->lock_ptr = &hb->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.spinlock = &hb->lock;
-#endif
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
prio = min(current->normal_prio, MAX_RT_PRIO);
plist_node_init(&q->list, prio);
-#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.spinlock = &hb->lock;
-#endif
plist_add(&q->list, &hb->chain);
q->task = current;
spin_unlock(&hb->lock);
@@ -1504,8 +1505,7 @@ retry:
spin_unlock(lock_ptr);
goto retry;
}
- WARN_ON(plist_node_empty(&q->list));
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
BUG_ON(q->pi_state);
@@ -1525,8 +1525,7 @@ retry:
static void unqueue_me_pi(struct futex_q *q)
__releases(q->lock_ptr)
{
- WARN_ON(plist_node_empty(&q->list));
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
BUG_ON(!q->pi_state);
free_pi_state(q->pi_state);
@@ -1556,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
/*
* We are here either because we stole the rtmutex from the
- * pending owner or we are the pending owner which failed to
- * get the rtmutex. We have to replace the pending owner TID
- * in the user space variable. This must be atomic as we have
- * to preserve the owner died bit here.
+ * previous highest priority waiter or we are the highest priority
+ * waiter but failed to get the rtmutex the first time.
+ * We have to replace the newowner TID in the user space variable.
+ * This must be atomic as we have to preserve the owner died bit here.
*
* Note: We write the user space value _before_ changing the pi_state
* because we can fault here. Imagine swapped out pages or a fork
@@ -1578,9 +1577,7 @@ retry:
while (1) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
- if (curval == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
goto handle_fault;
if (curval == uval)
break;
@@ -1608,8 +1605,8 @@ retry:
/*
* To handle the page fault we need to drop the hash bucket
- * lock here. That gives the other task (either the pending
- * owner itself or the task which stole the rtmutex) the
+ * lock here. That gives the other task (either the highest priority
+ * waiter itself or the task which stole the rtmutex) the
* chance to try the fixup of the pi_state. So once we are
* back from handling the fault we need to check the pi_state
* after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
/*
* pi_state is incorrect, some other task did a lock steal and
* we returned due to timeout or signal without taking the
- * rt_mutex. Too late. We can access the rt_mutex_owner without
- * locking, as the other task is now blocked on the hash bucket
- * lock. Fix the state up.
+ * rt_mutex. Too late.
*/
+ raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
owner = rt_mutex_owner(&q->pi_state->pi_mutex);
+ if (!owner)
+ owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
+ raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
ret = fixup_pi_state_owner(uaddr, q, owner);
goto out;
}
/*
* Paranoia check. If we did not take the lock, then we should not be
- * the owner, nor the pending owner, of the rt_mutex.
+ * the owner of the rt_mutex.
*/
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
@@ -1781,13 +1780,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
*
* The basic logical guarantee of a futex is that it blocks ONLY
* if cond(var) is known to be true at the time of blocking, for
- * any cond. If we queued after testing *uaddr, that would open
- * a race condition where we could block indefinitely with
+ * any cond. If we locked the hash-bucket after testing *uaddr, that
+ * would open a race condition where we could block indefinitely with
* cond(var) false, which would violate the guarantee.
*
- * A consequence is that futex_wait() can return zero and absorb
- * a wakeup when *uaddr != val on entry to the syscall. This is
- * rare, but normal.
+ * On the other hand, we insert q and release the hash-bucket only
+ * after testing *uaddr. This guarantees that futex_wait() will NOT
+ * absorb a wakeup if *uaddr does not match the desired values
+ * while the syscall executes.
*/
retry:
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
@@ -2046,9 +2046,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- u32 uval;
struct plist_head *head;
union futex_key key = FUTEX_KEY_INIT;
+ u32 uval, vpid = task_pid_vnr(current);
int ret;
retry:
@@ -2057,7 +2057,7 @@ retry:
/*
* We release only a lock we actually own:
*/
- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
+ if ((uval & FUTEX_TID_MASK) != vpid)
return -EPERM;
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
@@ -2072,17 +2072,14 @@ retry:
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- if (!(uval & FUTEX_OWNER_DIED))
- uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
-
-
- if (unlikely(uval == -EFAULT))
+ if (!(uval & FUTEX_OWNER_DIED) &&
+ cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
goto pi_faulted;
/*
* Rare case: we managed to release the lock atomically,
* no need to wake anyone else up:
*/
- if (unlikely(uval == task_pid_vnr(current)))
+ if (unlikely(uval == vpid))
goto out_unlock;
/*
@@ -2167,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* We were woken prior to requeue by a timeout or a signal.
* Unqueue the futex_q and determine which it was.
*/
- plist_del(&q->list, &q->list.plist);
+ plist_del(&q->list, &hb->chain);
/* Handle spurious wakeups gracefully */
ret = -EWOULDBLOCK;
@@ -2463,11 +2460,20 @@ retry:
* userspace.
*/
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
- nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
-
- if (nval == -EFAULT)
- return -1;
-
+ /*
+ * We are not holding a lock here, but we want to have
+ * the pagefault_disable/enable() protection because
+ * we want to handle the fault gracefully. If the
+ * access fails we try to fault in the futex with R/W
+ * verification via get_user_pages. get_user() above
+ * does not guarantee R/W access. If that fails we
+ * give up and leave the futex locked.
+ */
+ if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
+ if (fault_in_user_writeable(uaddr))
+ return -1;
+ goto retry;
+ }
if (nval != uval)
goto retry;
@@ -2678,8 +2684,7 @@ static int __init futex_init(void)
* implementation, the non-functional ones will return
* -ENOSYS.
*/
- curval = cmpxchg_futex_value_locked(NULL, 0, 0);
- if (curval == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
futex_cmpxchg_enabled = 1;
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0c8d7c04861..9017478c5d4 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -53,11 +53,10 @@
/*
* The timer bases:
*
- * Note: If we want to add new timer bases, we have to skip the two
- * clock ids captured by the cpu-timers. We do this by holding empty
- * entries rather than doing math adjustment of the clock ids.
- * This ensures that we capture erroneous accesses to these clock ids
- * rather than moving them into the range of valid clock id's.
+ * There are more clockids then hrtimer bases. Thus, we index
+ * into the timer bases by the hrtimer_base_type enum. When trying
+ * to reach a base using a clockid, hrtimer_clockid_to_base()
+ * is used to convert from clockid to the proper hrtimer_base_type.
*/
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
@@ -74,30 +73,39 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.get_time = &ktime_get,
.resolution = KTIME_LOW_RES,
},
+ {
+ .index = CLOCK_BOOTTIME,
+ .get_time = &ktime_get_boottime,
+ .resolution = KTIME_LOW_RES,
+ },
}
};
+static int hrtimer_clock_to_base_table[MAX_CLOCKS];
+
+static inline int hrtimer_clockid_to_base(clockid_t clock_id)
+{
+ return hrtimer_clock_to_base_table[clock_id];
+}
+
+
/*
* Get the coarse grained time at the softirq based on xtime and
* wall_to_monotonic.
*/
static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
{
- ktime_t xtim, tomono;
- struct timespec xts, tom;
- unsigned long seq;
+ ktime_t xtim, mono, boot;
+ struct timespec xts, tom, slp;
- do {
- seq = read_seqbegin(&xtime_lock);
- xts = __current_kernel_time();
- tom = __get_wall_to_monotonic();
- } while (read_seqretry(&xtime_lock, seq));
+ get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
xtim = timespec_to_ktime(xts);
- tomono = timespec_to_ktime(tom);
- base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
- base->clock_base[CLOCK_MONOTONIC].softirq_time =
- ktime_add(xtim, tomono);
+ mono = ktime_add(xtim, timespec_to_ktime(tom));
+ boot = ktime_add(mono, timespec_to_ktime(slp));
+ base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
+ base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
+ base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
}
/*
@@ -184,10 +192,11 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
struct hrtimer_cpu_base *new_cpu_base;
int this_cpu = smp_processor_id();
int cpu = hrtimer_get_target(this_cpu, pinned);
+ int basenum = hrtimer_clockid_to_base(base->index);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
- new_base = &new_cpu_base->clock_base[base->index];
+ new_base = &new_cpu_base->clock_base[basenum];
if (base != new_base) {
/*
@@ -334,6 +343,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
static struct debug_obj_descr hrtimer_debug_descr;
+static void *hrtimer_debug_hint(void *addr)
+{
+ return ((struct hrtimer *) addr)->function;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -393,6 +407,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
static struct debug_obj_descr hrtimer_debug_descr = {
.name = "hrtimer",
+ .debug_hint = hrtimer_debug_hint,
.fixup_init = hrtimer_fixup_init,
.fixup_activate = hrtimer_fixup_activate,
.fixup_free = hrtimer_fixup_free,
@@ -611,24 +626,23 @@ static int hrtimer_reprogram(struct hrtimer *timer,
static void retrigger_next_event(void *arg)
{
struct hrtimer_cpu_base *base;
- struct timespec realtime_offset, wtm;
- unsigned long seq;
+ struct timespec realtime_offset, wtm, sleep;
if (!hrtimer_hres_active())
return;
- do {
- seq = read_seqbegin(&xtime_lock);
- wtm = __get_wall_to_monotonic();
- } while (read_seqretry(&xtime_lock, seq));
+ get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
+ &sleep);
set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
base = &__get_cpu_var(hrtimer_bases);
/* Adjust CLOCK_REALTIME offset */
raw_spin_lock(&base->lock);
- base->clock_base[CLOCK_REALTIME].offset =
+ base->clock_base[HRTIMER_BASE_REALTIME].offset =
timespec_to_ktime(realtime_offset);
+ base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+ timespec_to_ktime(sleep);
hrtimer_force_reprogram(base, 0);
raw_spin_unlock(&base->lock);
@@ -673,14 +687,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
}
/*
- * Initialize the high resolution related parts of a hrtimer
- */
-static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
-{
-}
-
-
-/*
* When High resolution timers are active, try to reprogram. Note, that in case
* the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
* check happens. The timer gets enqueued into the rbtree. The reprogramming
@@ -725,8 +731,9 @@ static int hrtimer_switch_to_hres(void)
return 0;
}
base->hres_active = 1;
- base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
- base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
+ base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
+ base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
+ base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
tick_setup_sched_timer();
@@ -750,7 +757,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
return 0;
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
-static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -1121,6 +1127,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
struct hrtimer_cpu_base *cpu_base;
+ int base;
memset(timer, 0, sizeof(struct hrtimer));
@@ -1129,8 +1136,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
clock_id = CLOCK_MONOTONIC;
- timer->base = &cpu_base->clock_base[clock_id];
- hrtimer_init_timer_hres(timer);
+ base = hrtimer_clockid_to_base(clock_id);
+ timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
#ifdef CONFIG_TIMER_STATS
@@ -1165,9 +1172,10 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
{
struct hrtimer_cpu_base *cpu_base;
+ int base = hrtimer_clockid_to_base(which_clock);
cpu_base = &__raw_get_cpu_var(hrtimer_bases);
- *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution);
+ *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
return 0;
}
@@ -1714,6 +1722,10 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
void __init hrtimers_init(void)
{
+ hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME;
+ hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC;
+ hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME;
+
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 8e42fec7686..09bef82d74c 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -1,5 +1,6 @@
+# Select this to activate the generic irq options below
config HAVE_GENERIC_HARDIRQS
- def_bool n
+ bool
if HAVE_GENERIC_HARDIRQS
menu "IRQ subsystem"
@@ -11,26 +12,44 @@ config GENERIC_HARDIRQS
# Select this to disable the deprecated stuff
config GENERIC_HARDIRQS_NO_DEPRECATED
- def_bool n
+ bool
+
+config GENERIC_HARDIRQS_NO_COMPAT
+ bool
# Options selectable by the architecture code
+
+# Make sparse irq Kconfig switch below available
config HAVE_SPARSE_IRQ
- def_bool n
+ bool
+# Enable the generic irq autoprobe mechanism
config GENERIC_IRQ_PROBE
- def_bool n
+ bool
+
+# Use the generic /proc/interrupts implementation
+config GENERIC_IRQ_SHOW
+ bool
+# Support for delayed migration from interrupt context
config GENERIC_PENDING_IRQ
- def_bool n
+ bool
+# Alpha specific irq affinity mechanism
config AUTO_IRQ_AFFINITY
- def_bool n
-
-config IRQ_PER_CPU
- def_bool n
+ bool
+# Tasklet based software resend for pending interrupts on enable_irq()
config HARDIRQS_SW_RESEND
- def_bool n
+ bool
+
+# Preflow handler support for fasteoi (sparc64)
+config IRQ_PREFLOW_FASTEOI
+ bool
+
+# Support forced irq threading
+config IRQ_FORCED_THREADING
+ bool
config SPARSE_IRQ
bool "Support sparse irq numbering"
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 505798f86c3..394784c5706 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -17,7 +17,7 @@
/*
* Autodetection depends on the fact that any interrupt that
* comes in on to an unassigned handler will get stuck with
- * "IRQ_WAITING" cleared and the interrupt disabled.
+ * "IRQS_WAITING" cleared and the interrupt disabled.
*/
static DEFINE_MUTEX(probing_active);
@@ -32,7 +32,6 @@ unsigned long probe_irq_on(void)
{
struct irq_desc *desc;
unsigned long mask = 0;
- unsigned int status;
int i;
/*
@@ -46,13 +45,7 @@ unsigned long probe_irq_on(void)
*/
for_each_irq_desc_reverse(i, desc) {
raw_spin_lock_irq(&desc->lock);
- if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
- /*
- * An old-style architecture might still have
- * the handle_bad_irq handler there:
- */
- compat_irq_chip_set_default_handler(desc);
-
+ if (!desc->action && irq_settings_can_probe(desc)) {
/*
* Some chips need to know about probing in
* progress:
@@ -60,7 +53,7 @@ unsigned long probe_irq_on(void)
if (desc->irq_data.chip->irq_set_type)
desc->irq_data.chip->irq_set_type(&desc->irq_data,
IRQ_TYPE_PROBE);
- desc->irq_data.chip->irq_startup(&desc->irq_data);
+ irq_startup(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -75,10 +68,12 @@ unsigned long probe_irq_on(void)
*/
for_each_irq_desc_reverse(i, desc) {
raw_spin_lock_irq(&desc->lock);
- if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
- desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
- if (desc->irq_data.chip->irq_startup(&desc->irq_data))
- desc->status |= IRQ_PENDING;
+ if (!desc->action && irq_settings_can_probe(desc)) {
+ desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
+ if (irq_startup(desc)) {
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
+ }
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -93,13 +88,12 @@ unsigned long probe_irq_on(void)
*/
for_each_irq_desc(i, desc) {
raw_spin_lock_irq(&desc->lock);
- status = desc->status;
- if (status & IRQ_AUTODETECT) {
+ if (desc->istate & IRQS_AUTODETECT) {
/* It triggered already - consider it spurious. */
- if (!(status & IRQ_WAITING)) {
- desc->status = status & ~IRQ_AUTODETECT;
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ if (!(desc->istate & IRQS_WAITING)) {
+ desc->istate &= ~IRQS_AUTODETECT;
+ irq_shutdown(desc);
} else
if (i < 32)
mask |= 1 << i;
@@ -125,20 +119,18 @@ EXPORT_SYMBOL(probe_irq_on);
*/
unsigned int probe_irq_mask(unsigned long val)
{
- unsigned int status, mask = 0;
+ unsigned int mask = 0;
struct irq_desc *desc;
int i;
for_each_irq_desc(i, desc) {
raw_spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- if (i < 16 && !(status & IRQ_WAITING))
+ if (desc->istate & IRQS_AUTODETECT) {
+ if (i < 16 && !(desc->istate & IRQS_WAITING))
mask |= 1 << i;
- desc->status = status & ~IRQ_AUTODETECT;
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ desc->istate &= ~IRQS_AUTODETECT;
+ irq_shutdown(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -169,20 +161,18 @@ int probe_irq_off(unsigned long val)
{
int i, irq_found = 0, nr_of_irqs = 0;
struct irq_desc *desc;
- unsigned int status;
for_each_irq_desc(i, desc) {
raw_spin_lock_irq(&desc->lock);
- status = desc->status;
- if (status & IRQ_AUTODETECT) {
- if (!(status & IRQ_WAITING)) {
+ if (desc->istate & IRQS_AUTODETECT) {
+ if (!(desc->istate & IRQS_WAITING)) {
if (!nr_of_irqs)
irq_found = i;
nr_of_irqs++;
}
- desc->status = status & ~IRQ_AUTODETECT;
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ desc->istate &= ~IRQS_AUTODETECT;
+ irq_shutdown(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index baa5c4acad8..c9c0601f061 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -19,140 +19,110 @@
#include "internals.h"
/**
- * set_irq_chip - set the irq chip for an irq
+ * irq_set_chip - set the irq chip for an irq
* @irq: irq number
* @chip: pointer to irq chip description structure
*/
-int set_irq_chip(unsigned int irq, struct irq_chip *chip)
+int irq_set_chip(unsigned int irq, struct irq_chip *chip)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+ if (!desc)
return -EINVAL;
- }
if (!chip)
chip = &no_irq_chip;
- raw_spin_lock_irqsave(&desc->lock, flags);
irq_chip_set_defaults(chip);
desc->irq_data.chip = chip;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ irq_put_desc_unlock(desc, flags);
return 0;
}
-EXPORT_SYMBOL(set_irq_chip);
+EXPORT_SYMBOL(irq_set_chip);
/**
- * set_irq_type - set the irq trigger type for an irq
+ * irq_set_type - set the irq trigger type for an irq
* @irq: irq number
* @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
*/
-int set_irq_type(unsigned int irq, unsigned int type)
+int irq_set_irq_type(unsigned int irq, unsigned int type)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
- int ret = -ENXIO;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
+ int ret = 0;
- if (!desc) {
- printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
- return -ENODEV;
- }
+ if (!desc)
+ return -EINVAL;
type &= IRQ_TYPE_SENSE_MASK;
- if (type == IRQ_TYPE_NONE)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- ret = __irq_set_trigger(desc, irq, type);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (type != IRQ_TYPE_NONE)
+ ret = __irq_set_trigger(desc, irq, type);
+ irq_put_desc_busunlock(desc, flags);
return ret;
}
-EXPORT_SYMBOL(set_irq_type);
+EXPORT_SYMBOL(irq_set_irq_type);
/**
- * set_irq_data - set irq type data for an irq
+ * irq_set_handler_data - set irq handler data for an irq
* @irq: Interrupt number
* @data: Pointer to interrupt specific data
*
* Set the hardware irq controller data for an irq
*/
-int set_irq_data(unsigned int irq, void *data)
+int irq_set_handler_data(unsigned int irq, void *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install controller data for IRQ%d\n", irq);
+ if (!desc)
return -EINVAL;
- }
-
- raw_spin_lock_irqsave(&desc->lock, flags);
desc->irq_data.handler_data = data;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_unlock(desc, flags);
return 0;
}
-EXPORT_SYMBOL(set_irq_data);
+EXPORT_SYMBOL(irq_set_handler_data);
/**
- * set_irq_msi - set MSI descriptor data for an irq
+ * irq_set_msi_desc - set MSI descriptor data for an irq
* @irq: Interrupt number
* @entry: Pointer to MSI descriptor data
*
* Set the MSI descriptor entry for an irq
*/
-int set_irq_msi(unsigned int irq, struct msi_desc *entry)
+int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install msi data for IRQ%d\n", irq);
+ if (!desc)
return -EINVAL;
- }
-
- raw_spin_lock_irqsave(&desc->lock, flags);
desc->irq_data.msi_desc = entry;
if (entry)
entry->irq = irq;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_unlock(desc, flags);
return 0;
}
/**
- * set_irq_chip_data - set irq chip data for an irq
+ * irq_set_chip_data - set irq chip data for an irq
* @irq: Interrupt number
* @data: Pointer to chip specific data
*
* Set the hardware irq chip data for an irq
*/
-int set_irq_chip_data(unsigned int irq, void *data)
+int irq_set_chip_data(unsigned int irq, void *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install chip data for IRQ%d\n", irq);
- return -EINVAL;
- }
-
- if (!desc->irq_data.chip) {
- printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
+ if (!desc)
return -EINVAL;
- }
-
- raw_spin_lock_irqsave(&desc->lock, flags);
desc->irq_data.chip_data = data;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ irq_put_desc_unlock(desc, flags);
return 0;
}
-EXPORT_SYMBOL(set_irq_chip_data);
+EXPORT_SYMBOL(irq_set_chip_data);
struct irq_data *irq_get_irq_data(unsigned int irq)
{
@@ -162,72 +132,75 @@ struct irq_data *irq_get_irq_data(unsigned int irq)
}
EXPORT_SYMBOL_GPL(irq_get_irq_data);
-/**
- * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
- *
- * @irq: Interrupt number
- * @nest: 0 to clear / 1 to set the IRQ_NESTED_THREAD flag
- *
- * The IRQ_NESTED_THREAD flag indicates that on
- * request_threaded_irq() no separate interrupt thread should be
- * created for the irq as the handler are called nested in the
- * context of a demultiplexing interrupt handler thread.
- */
-void set_irq_nested_thread(unsigned int irq, int nest)
+static void irq_state_clr_disabled(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
-
- if (!desc)
- return;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- if (nest)
- desc->status |= IRQ_NESTED_THREAD;
- else
- desc->status &= ~IRQ_NESTED_THREAD;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ desc->istate &= ~IRQS_DISABLED;
+ irq_compat_clr_disabled(desc);
}
-EXPORT_SYMBOL_GPL(set_irq_nested_thread);
-/*
- * default enable function
- */
-static void default_enable(struct irq_data *data)
+static void irq_state_set_disabled(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_data_to_desc(data);
+ desc->istate |= IRQS_DISABLED;
+ irq_compat_set_disabled(desc);
+}
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
- desc->status &= ~IRQ_MASKED;
+static void irq_state_clr_masked(struct irq_desc *desc)
+{
+ desc->istate &= ~IRQS_MASKED;
+ irq_compat_clr_masked(desc);
}
-/*
- * default disable function
- */
-static void default_disable(struct irq_data *data)
+static void irq_state_set_masked(struct irq_desc *desc)
{
+ desc->istate |= IRQS_MASKED;
+ irq_compat_set_masked(desc);
}
-/*
- * default startup function
- */
-static unsigned int default_startup(struct irq_data *data)
+int irq_startup(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_data_to_desc(data);
+ irq_state_clr_disabled(desc);
+ desc->depth = 0;
+
+ if (desc->irq_data.chip->irq_startup) {
+ int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+ irq_state_clr_masked(desc);
+ return ret;
+ }
- desc->irq_data.chip->irq_enable(data);
+ irq_enable(desc);
return 0;
}
-/*
- * default shutdown function
- */
-static void default_shutdown(struct irq_data *data)
+void irq_shutdown(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_data_to_desc(data);
+ irq_state_set_disabled(desc);
+ desc->depth = 1;
+ if (desc->irq_data.chip->irq_shutdown)
+ desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ if (desc->irq_data.chip->irq_disable)
+ desc->irq_data.chip->irq_disable(&desc->irq_data);
+ else
+ desc->irq_data.chip->irq_mask(&desc->irq_data);
+ irq_state_set_masked(desc);
+}
- desc->irq_data.chip->irq_mask(&desc->irq_data);
- desc->status |= IRQ_MASKED;
+void irq_enable(struct irq_desc *desc)
+{
+ irq_state_clr_disabled(desc);
+ if (desc->irq_data.chip->irq_enable)
+ desc->irq_data.chip->irq_enable(&desc->irq_data);
+ else
+ desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ irq_state_clr_masked(desc);
+}
+
+void irq_disable(struct irq_desc *desc)
+{
+ irq_state_set_disabled(desc);
+ if (desc->irq_data.chip->irq_disable) {
+ desc->irq_data.chip->irq_disable(&desc->irq_data);
+ irq_state_set_masked(desc);
+ }
}
#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
@@ -315,10 +288,6 @@ static void compat_bus_sync_unlock(struct irq_data *data)
void irq_chip_set_defaults(struct irq_chip *chip)
{
#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
- /*
- * Compat fixup functions need to be before we set the
- * defaults for enable/disable/startup/shutdown
- */
if (chip->enable)
chip->irq_enable = compat_irq_enable;
if (chip->disable)
@@ -327,33 +296,8 @@ void irq_chip_set_defaults(struct irq_chip *chip)
chip->irq_shutdown = compat_irq_shutdown;
if (chip->startup)
chip->irq_startup = compat_irq_startup;
-#endif
- /*
- * The real defaults
- */
- if (!chip->irq_enable)
- chip->irq_enable = default_enable;
- if (!chip->irq_disable)
- chip->irq_disable = default_disable;
- if (!chip->irq_startup)
- chip->irq_startup = default_startup;
- /*
- * We use chip->irq_disable, when the user provided its own. When
- * we have default_disable set for chip->irq_disable, then we need
- * to use default_shutdown, otherwise the irq line is not
- * disabled on free_irq():
- */
- if (!chip->irq_shutdown)
- chip->irq_shutdown = chip->irq_disable != default_disable ?
- chip->irq_disable : default_shutdown;
-
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
if (!chip->end)
chip->end = dummy_irq_chip.end;
-
- /*
- * Now fix up the remaining compat handlers
- */
if (chip->bus_lock)
chip->irq_bus_lock = compat_bus_lock;
if (chip->bus_sync_unlock)
@@ -388,22 +332,22 @@ static inline void mask_ack_irq(struct irq_desc *desc)
if (desc->irq_data.chip->irq_ack)
desc->irq_data.chip->irq_ack(&desc->irq_data);
}
- desc->status |= IRQ_MASKED;
+ irq_state_set_masked(desc);
}
-static inline void mask_irq(struct irq_desc *desc)
+void mask_irq(struct irq_desc *desc)
{
if (desc->irq_data.chip->irq_mask) {
desc->irq_data.chip->irq_mask(&desc->irq_data);
- desc->status |= IRQ_MASKED;
+ irq_state_set_masked(desc);
}
}
-static inline void unmask_irq(struct irq_desc *desc)
+void unmask_irq(struct irq_desc *desc)
{
if (desc->irq_data.chip->irq_unmask) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
- desc->status &= ~IRQ_MASKED;
+ irq_state_clr_masked(desc);
}
}
@@ -428,10 +372,11 @@ void handle_nested_irq(unsigned int irq)
kstat_incr_irqs_this_cpu(irq, desc);
action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ if (unlikely(!action || (desc->istate & IRQS_DISABLED)))
goto out_unlock;
- desc->status |= IRQ_INPROGRESS;
+ irq_compat_set_progress(desc);
+ desc->istate |= IRQS_INPROGRESS;
raw_spin_unlock_irq(&desc->lock);
action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -439,13 +384,21 @@ void handle_nested_irq(unsigned int irq)
note_interrupt(irq, desc, action_ret);
raw_spin_lock_irq(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
+ desc->istate &= ~IRQS_INPROGRESS;
+ irq_compat_clr_progress(desc);
out_unlock:
raw_spin_unlock_irq(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_nested_irq);
+static bool irq_check_poll(struct irq_desc *desc)
+{
+ if (!(desc->istate & IRQS_POLL_INPROGRESS))
+ return false;
+ return irq_wait_for_poll(desc);
+}
+
/**
* handle_simple_irq - Simple and software-decoded IRQs.
* @irq: the interrupt number
@@ -461,29 +414,20 @@ EXPORT_SYMBOL_GPL(handle_nested_irq);
void
handle_simple_irq(unsigned int irq, struct irq_desc *desc)
{
- struct irqaction *action;
- irqreturn_t action_ret;
-
raw_spin_lock(&desc->lock);
- if (unlikely(desc->status & IRQ_INPROGRESS))
- goto out_unlock;
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (!irq_check_poll(desc))
+ goto out_unlock;
+
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
- action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
goto out_unlock;
- desc->status |= IRQ_INPROGRESS;
- raw_spin_unlock(&desc->lock);
+ handle_irq_event(desc);
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
-
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -501,42 +445,42 @@ out_unlock:
void
handle_level_irq(unsigned int irq, struct irq_desc *desc)
{
- struct irqaction *action;
- irqreturn_t action_ret;
-
raw_spin_lock(&desc->lock);
mask_ack_irq(desc);
- if (unlikely(desc->status & IRQ_INPROGRESS))
- goto out_unlock;
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (!irq_check_poll(desc))
+ goto out_unlock;
+
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
/*
* If its disabled or no action available
* keep it masked and get out of here
*/
- action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
goto out_unlock;
- desc->status |= IRQ_INPROGRESS;
- raw_spin_unlock(&desc->lock);
-
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ handle_irq_event(desc);
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
-
- if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
+ if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT)))
unmask_irq(desc);
out_unlock:
raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_level_irq);
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+static inline void preflow_handler(struct irq_desc *desc)
+{
+ if (desc->preflow_handler)
+ desc->preflow_handler(&desc->irq_data);
+}
+#else
+static inline void preflow_handler(struct irq_desc *desc) { }
+#endif
+
/**
* handle_fasteoi_irq - irq handler for transparent controllers
* @irq: the interrupt number
@@ -550,42 +494,41 @@ EXPORT_SYMBOL_GPL(handle_level_irq);
void
handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
{
- struct irqaction *action;
- irqreturn_t action_ret;
-
raw_spin_lock(&desc->lock);
- if (unlikely(desc->status & IRQ_INPROGRESS))
- goto out;
+ if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (!irq_check_poll(desc))
+ goto out;
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
/*
* If its disabled or no action available
* then mask it and get out of here:
*/
- action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
- desc->status |= IRQ_PENDING;
+ if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) {
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
mask_irq(desc);
goto out;
}
- desc->status |= IRQ_INPROGRESS;
- desc->status &= ~IRQ_PENDING;
- raw_spin_unlock(&desc->lock);
+ if (desc->istate & IRQS_ONESHOT)
+ mask_irq(desc);
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ preflow_handler(desc);
+ handle_irq_event(desc);
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
-out:
+out_eoi:
desc->irq_data.chip->irq_eoi(&desc->irq_data);
-
+out_unlock:
raw_spin_unlock(&desc->lock);
+ return;
+out:
+ if (!(desc->irq_data.chip->flags & IRQCHIP_EOI_IF_HANDLED))
+ goto out_eoi;
+ goto out_unlock;
}
/**
@@ -609,32 +552,28 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
/*
* If we're currently running this IRQ, or its disabled,
* we shouldn't process the IRQ. Mark it pending, handle
* the necessary masking and go out
*/
- if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) ||
- !desc->action)) {
- desc->status |= (IRQ_PENDING | IRQ_MASKED);
- mask_ack_irq(desc);
- goto out_unlock;
+ if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) ||
+ !desc->action))) {
+ if (!irq_check_poll(desc)) {
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
+ mask_ack_irq(desc);
+ goto out_unlock;
+ }
}
kstat_incr_irqs_this_cpu(irq, desc);
/* Start handling the irq */
desc->irq_data.chip->irq_ack(&desc->irq_data);
- /* Mark the IRQ currently in progress.*/
- desc->status |= IRQ_INPROGRESS;
-
do {
- struct irqaction *action = desc->action;
- irqreturn_t action_ret;
-
- if (unlikely(!action)) {
+ if (unlikely(!desc->action)) {
mask_irq(desc);
goto out_unlock;
}
@@ -644,22 +583,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
* one, we could have masked the irq.
* Renable it, if it was not disabled in meantime.
*/
- if (unlikely((desc->status &
- (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
- (IRQ_PENDING | IRQ_MASKED))) {
- unmask_irq(desc);
+ if (unlikely(desc->istate & IRQS_PENDING)) {
+ if (!(desc->istate & IRQS_DISABLED) &&
+ (desc->istate & IRQS_MASKED))
+ unmask_irq(desc);
}
- desc->status &= ~IRQ_PENDING;
- raw_spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
- raw_spin_lock(&desc->lock);
+ handle_irq_event(desc);
- } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
+ } while ((desc->istate & IRQS_PENDING) &&
+ !(desc->istate & IRQS_DISABLED));
- desc->status &= ~IRQ_INPROGRESS;
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -674,103 +608,84 @@ out_unlock:
void
handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
{
- irqreturn_t action_ret;
+ struct irq_chip *chip = irq_desc_get_chip(desc);
kstat_incr_irqs_this_cpu(irq, desc);
- if (desc->irq_data.chip->irq_ack)
- desc->irq_data.chip->irq_ack(&desc->irq_data);
+ if (chip->irq_ack)
+ chip->irq_ack(&desc->irq_data);
- action_ret = handle_IRQ_event(irq, desc->action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ handle_irq_event_percpu(desc, desc->action);
- if (desc->irq_data.chip->irq_eoi)
- desc->irq_data.chip->irq_eoi(&desc->irq_data);
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
}
void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
const char *name)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install type control for IRQ%d\n", irq);
+ if (!desc)
return;
- }
- if (!handle)
+ if (!handle) {
handle = handle_bad_irq;
- else if (desc->irq_data.chip == &no_irq_chip) {
- printk(KERN_WARNING "Trying to install %sinterrupt handler "
- "for IRQ%d\n", is_chained ? "chained " : "", irq);
- /*
- * Some ARM implementations install a handler for really dumb
- * interrupt hardware without setting an irq_chip. This worked
- * with the ARM no_irq_chip but the check in setup_irq would
- * prevent us to setup the interrupt at all. Switch it to
- * dummy_irq_chip for easy transition.
- */
- desc->irq_data.chip = &dummy_irq_chip;
+ } else {
+ if (WARN_ON(desc->irq_data.chip == &no_irq_chip))
+ goto out;
}
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, flags);
-
/* Uninstall? */
if (handle == handle_bad_irq) {
if (desc->irq_data.chip != &no_irq_chip)
mask_ack_irq(desc);
- desc->status |= IRQ_DISABLED;
+ irq_compat_set_disabled(desc);
+ desc->istate |= IRQS_DISABLED;
desc->depth = 1;
}
desc->handle_irq = handle;
desc->name = name;
if (handle != handle_bad_irq && is_chained) {
- desc->status &= ~IRQ_DISABLED;
- desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
- desc->depth = 0;
- desc->irq_data.chip->irq_startup(&desc->irq_data);
+ irq_settings_set_noprobe(desc);
+ irq_settings_set_norequest(desc);
+ irq_startup(desc);
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- chip_bus_sync_unlock(desc);
-}
-EXPORT_SYMBOL_GPL(__set_irq_handler);
-
-void
-set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
- irq_flow_handler_t handle)
-{
- set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0, NULL);
+out:
+ irq_put_desc_busunlock(desc, flags);
}
+EXPORT_SYMBOL_GPL(__irq_set_handler);
void
-set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
irq_flow_handler_t handle, const char *name)
{
- set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0, name);
+ irq_set_chip(irq, chip);
+ __irq_set_handler(irq, handle, 0, name);
}
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
if (!desc)
return;
+ irq_settings_clr_and_set(desc, clr, set);
+
+ irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
+ IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
+ if (irq_settings_has_no_balance_set(desc))
+ irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ if (irq_settings_is_per_cpu(desc))
+ irqd_set(&desc->irq_data, IRQD_PER_CPU);
+ if (irq_settings_can_move_pcntxt(desc))
+ irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
- /* Sanitize flags */
- set &= IRQF_MODIFY_MASK;
- clr &= IRQF_MODIFY_MASK;
+ irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
- raw_spin_lock_irqsave(&desc->lock, flags);
- desc->status &= ~clr;
- desc->status |= set;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_unlock(desc, flags);
}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
new file mode 100644
index 00000000000..6bbaf66aca8
--- /dev/null
+++ b/kernel/irq/compat.h
@@ -0,0 +1,72 @@
+/*
+ * Compat layer for transition period
+ */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+static inline void irq_compat_set_progress(struct irq_desc *desc)
+{
+ desc->status |= IRQ_INPROGRESS;
+}
+
+static inline void irq_compat_clr_progress(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_INPROGRESS;
+}
+static inline void irq_compat_set_disabled(struct irq_desc *desc)
+{
+ desc->status |= IRQ_DISABLED;
+}
+static inline void irq_compat_clr_disabled(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_DISABLED;
+}
+static inline void irq_compat_set_pending(struct irq_desc *desc)
+{
+ desc->status |= IRQ_PENDING;
+}
+
+static inline void irq_compat_clr_pending(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_PENDING;
+}
+static inline void irq_compat_set_masked(struct irq_desc *desc)
+{
+ desc->status |= IRQ_MASKED;
+}
+
+static inline void irq_compat_clr_masked(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_MASKED;
+}
+static inline void irq_compat_set_move_pending(struct irq_desc *desc)
+{
+ desc->status |= IRQ_MOVE_PENDING;
+}
+
+static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_MOVE_PENDING;
+}
+static inline void irq_compat_set_affinity(struct irq_desc *desc)
+{
+ desc->status |= IRQ_AFFINITY_SET;
+}
+
+static inline void irq_compat_clr_affinity(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_AFFINITY_SET;
+}
+#else
+static inline void irq_compat_set_progress(struct irq_desc *desc) { }
+static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
+static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
+static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
+static inline void irq_compat_set_pending(struct irq_desc *desc) { }
+static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
+static inline void irq_compat_set_masked(struct irq_desc *desc) { }
+static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
+static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
+static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
+static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
+static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
+#endif
+
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
new file mode 100644
index 00000000000..d1a33b7fa61
--- /dev/null
+++ b/kernel/irq/debug.h
@@ -0,0 +1,40 @@
+/*
+ * Debugging printout:
+ */
+
+#include <linux/kallsyms.h>
+
+#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
+
+static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+ printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
+ irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
+ printk("->handle_irq(): %p, ", desc->handle_irq);
+ print_symbol("%s\n", (unsigned long)desc->handle_irq);
+ printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
+ print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
+ printk("->action(): %p\n", desc->action);
+ if (desc->action) {
+ printk("->action->handler(): %p, ", desc->action->handler);
+ print_symbol("%s\n", (unsigned long)desc->action->handler);
+ }
+
+ P(IRQ_LEVEL);
+ P(IRQ_PER_CPU);
+ P(IRQ_NOPROBE);
+ P(IRQ_NOREQUEST);
+ P(IRQ_NOAUTOEN);
+
+ PS(IRQS_AUTODETECT);
+ PS(IRQS_INPROGRESS);
+ PS(IRQS_REPLAY);
+ PS(IRQS_WAITING);
+ PS(IRQS_DISABLED);
+ PS(IRQS_PENDING);
+ PS(IRQS_MASKED);
+}
+
+#undef P
+#undef PS
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3540a719012..517561fc731 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -51,30 +51,92 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
"but no thread function available.", irq, action->name);
}
-/**
- * handle_IRQ_event - irq action chain handler
- * @irq: the interrupt number
- * @action: the interrupt action chain for this irq
- *
- * Handles the action chain of an irq event
- */
-irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
+static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
+{
+ /*
+ * Wake up the handler thread for this action. In case the
+ * thread crashed and was killed we just pretend that we
+ * handled the interrupt. The hardirq handler has disabled the
+ * device interrupt, so no irq storm is lurking. If the
+ * RUNTHREAD bit is already set, nothing to do.
+ */
+ if (test_bit(IRQTF_DIED, &action->thread_flags) ||
+ test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ return;
+
+ /*
+ * It's safe to OR the mask lockless here. We have only two
+ * places which write to threads_oneshot: This code and the
+ * irq thread.
+ *
+ * This code is the hard irq context and can never run on two
+ * cpus in parallel. If it ever does we have more serious
+ * problems than this bitmask.
+ *
+ * The irq threads of this irq which clear their "running" bit
+ * in threads_oneshot are serialized via desc->lock against
+ * each other and they are serialized against this code by
+ * IRQS_INPROGRESS.
+ *
+ * Hard irq handler:
+ *
+ * spin_lock(desc->lock);
+ * desc->state |= IRQS_INPROGRESS;
+ * spin_unlock(desc->lock);
+ * set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+ * desc->threads_oneshot |= mask;
+ * spin_lock(desc->lock);
+ * desc->state &= ~IRQS_INPROGRESS;
+ * spin_unlock(desc->lock);
+ *
+ * irq thread:
+ *
+ * again:
+ * spin_lock(desc->lock);
+ * if (desc->state & IRQS_INPROGRESS) {
+ * spin_unlock(desc->lock);
+ * while(desc->state & IRQS_INPROGRESS)
+ * cpu_relax();
+ * goto again;
+ * }
+ * if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ * desc->threads_oneshot &= ~mask;
+ * spin_unlock(desc->lock);
+ *
+ * So either the thread waits for us to clear IRQS_INPROGRESS
+ * or we are waiting in the flow handler for desc->lock to be
+ * released before we reach this point. The thread also checks
+ * IRQTF_RUNTHREAD under desc->lock. If set it leaves
+ * threads_oneshot untouched and runs the thread another time.
+ */
+ desc->threads_oneshot |= action->thread_mask;
+ wake_up_process(action->thread);
+}
+
+irqreturn_t
+handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
{
- irqreturn_t ret, retval = IRQ_NONE;
- unsigned int status = 0;
+ irqreturn_t retval = IRQ_NONE;
+ unsigned int random = 0, irq = desc->irq_data.irq;
do {
+ irqreturn_t res;
+
trace_irq_handler_entry(irq, action);
- ret = action->handler(irq, action->dev_id);
- trace_irq_handler_exit(irq, action, ret);
+ res = action->handler(irq, action->dev_id);
+ trace_irq_handler_exit(irq, action, res);
- switch (ret) {
+ if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
+ irq, action->handler))
+ local_irq_disable();
+
+ switch (res) {
case IRQ_WAKE_THREAD:
/*
* Set result to handled so the spurious check
* does not trigger.
*/
- ret = IRQ_HANDLED;
+ res = IRQ_HANDLED;
/*
* Catch drivers which return WAKE_THREAD but
@@ -85,36 +147,56 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
break;
}
- /*
- * Wake up the handler thread for this
- * action. In case the thread crashed and was
- * killed we just pretend that we handled the
- * interrupt. The hardirq handler above has
- * disabled the device interrupt, so no irq
- * storm is lurking.
- */
- if (likely(!test_bit(IRQTF_DIED,
- &action->thread_flags))) {
- set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
- wake_up_process(action->thread);
- }
+ irq_wake_thread(desc, action);
/* Fall through to add to randomness */
case IRQ_HANDLED:
- status |= action->flags;
+ random |= action->flags;
break;
default:
break;
}
- retval |= ret;
+ retval |= res;
action = action->next;
} while (action);
- if (status & IRQF_SAMPLE_RANDOM)
+ if (random & IRQF_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
- local_irq_disable();
+ if (!noirqdebug)
+ note_interrupt(irq, desc, retval);
return retval;
}
+
+irqreturn_t handle_irq_event(struct irq_desc *desc)
+{
+ struct irqaction *action = desc->action;
+ irqreturn_t ret;
+
+ irq_compat_clr_pending(desc);
+ desc->istate &= ~IRQS_PENDING;
+ irq_compat_set_progress(desc);
+ desc->istate |= IRQS_INPROGRESS;
+ raw_spin_unlock(&desc->lock);
+
+ ret = handle_irq_event_percpu(desc, action);
+
+ raw_spin_lock(&desc->lock);
+ desc->istate &= ~IRQS_INPROGRESS;
+ irq_compat_clr_progress(desc);
+ return ret;
+}
+
+/**
+ * handle_IRQ_event - irq action chain handler
+ * @irq: the interrupt number
+ * @action: the interrupt action chain for this irq
+ *
+ * Handles the action chain of an irq event
+ */
+irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
+{
+ return handle_irq_event_percpu(irq_to_desc(irq), action);
+}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 99c3bc8a6fb..6c6ec9a4902 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -1,5 +1,9 @@
/*
* IRQ subsystem internal functions and variables:
+ *
+ * Do not ever include this file from anything else than
+ * kernel/irq/. Do not even think about using any information outside
+ * of this file for your non core code.
*/
#include <linux/irqdesc.h>
@@ -9,25 +13,89 @@
# define IRQ_BITMAP_BITS NR_IRQS
#endif
+#define istate core_internal_state__do_not_mess_with_it
+
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+# define status status_use_accessors
+#endif
+
extern int noirqdebug;
+/*
+ * Bits used by threaded handlers:
+ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
+ * IRQTF_DIED - handler thread died
+ * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ * IRQTF_AFFINITY - irq thread is requested to adjust affinity
+ * IRQTF_FORCED_THREAD - irq action is force threaded
+ */
+enum {
+ IRQTF_RUNTHREAD,
+ IRQTF_DIED,
+ IRQTF_WARNED,
+ IRQTF_AFFINITY,
+ IRQTF_FORCED_THREAD,
+};
+
+/*
+ * Bit masks for desc->state
+ *
+ * IRQS_AUTODETECT - autodetection in progress
+ * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt
+ * detection
+ * IRQS_POLL_INPROGRESS - polling in progress
+ * IRQS_INPROGRESS - Interrupt in progress
+ * IRQS_ONESHOT - irq is not unmasked in primary handler
+ * IRQS_REPLAY - irq is replayed
+ * IRQS_WAITING - irq is waiting
+ * IRQS_DISABLED - irq is disabled
+ * IRQS_PENDING - irq is pending and replayed later
+ * IRQS_MASKED - irq is masked
+ * IRQS_SUSPENDED - irq is suspended
+ */
+enum {
+ IRQS_AUTODETECT = 0x00000001,
+ IRQS_SPURIOUS_DISABLED = 0x00000002,
+ IRQS_POLL_INPROGRESS = 0x00000008,
+ IRQS_INPROGRESS = 0x00000010,
+ IRQS_ONESHOT = 0x00000020,
+ IRQS_REPLAY = 0x00000040,
+ IRQS_WAITING = 0x00000080,
+ IRQS_DISABLED = 0x00000100,
+ IRQS_PENDING = 0x00000200,
+ IRQS_MASKED = 0x00000400,
+ IRQS_SUSPENDED = 0x00000800,
+};
+
+#include "compat.h"
+#include "debug.h"
+#include "settings.h"
+
#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data)
/* Set default functions for irq_chip structures: */
extern void irq_chip_set_defaults(struct irq_chip *chip);
-/* Set default handler: */
-extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
-
extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags);
extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+extern int irq_startup(struct irq_desc *desc);
+extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_enable(struct irq_desc *desc);
+extern void irq_disable(struct irq_desc *desc);
+extern void mask_irq(struct irq_desc *desc);
+extern void unmask_irq(struct irq_desc *desc);
+
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
+irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action);
+irqreturn_t handle_irq_event(struct irq_desc *desc);
+
/* Resending of interrupts :*/
void check_irq_resend(struct irq_desc *desc, unsigned int irq);
+bool irq_wait_for_poll(struct irq_desc *desc);
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
@@ -43,20 +111,10 @@ static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
#endif
-extern int irq_select_affinity_usr(unsigned int irq);
+extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
extern void irq_set_thread_affinity(struct irq_desc *desc);
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-static inline void irq_end(unsigned int irq, struct irq_desc *desc)
-{
- if (desc->irq_data.chip && desc->irq_data.chip->end)
- desc->irq_data.chip->end(irq);
-}
-#else
-static inline void irq_end(unsigned int irq, struct irq_desc *desc) { }
-#endif
-
/* Inline functions for support of irq chips on slow busses */
static inline void chip_bus_lock(struct irq_desc *desc)
{
@@ -70,43 +128,60 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
}
+struct irq_desc *
+__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus);
+void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);
+
+static inline struct irq_desc *
+irq_get_desc_buslock(unsigned int irq, unsigned long *flags)
+{
+ return __irq_get_desc_lock(irq, flags, true);
+}
+
+static inline void
+irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
+{
+ __irq_put_desc_unlock(desc, flags, true);
+}
+
+static inline struct irq_desc *
+irq_get_desc_lock(unsigned int irq, unsigned long *flags)
+{
+ return __irq_get_desc_lock(irq, flags, false);
+}
+
+static inline void
+irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
+{
+ __irq_put_desc_unlock(desc, flags, false);
+}
+
/*
- * Debugging printout:
+ * Manipulation functions for irq_data.state
*/
+static inline void irqd_set_move_pending(struct irq_data *d)
+{
+ d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
+ irq_compat_set_move_pending(irq_data_to_desc(d));
+}
-#include <linux/kallsyms.h>
-
-#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+static inline void irqd_clr_move_pending(struct irq_data *d)
+{
+ d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
+ irq_compat_clr_move_pending(irq_data_to_desc(d));
+}
-static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
+static inline void irqd_clear(struct irq_data *d, unsigned int mask)
{
- printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
- irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
- printk("->handle_irq(): %p, ", desc->handle_irq);
- print_symbol("%s\n", (unsigned long)desc->handle_irq);
- printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
- print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
- printk("->action(): %p\n", desc->action);
- if (desc->action) {
- printk("->action->handler(): %p, ", desc->action->handler);
- print_symbol("%s\n", (unsigned long)desc->action->handler);
- }
-
- P(IRQ_INPROGRESS);
- P(IRQ_DISABLED);
- P(IRQ_PENDING);
- P(IRQ_REPLAY);
- P(IRQ_AUTODETECT);
- P(IRQ_WAITING);
- P(IRQ_LEVEL);
- P(IRQ_MASKED);
-#ifdef CONFIG_IRQ_PER_CPU
- P(IRQ_PER_CPU);
-#endif
- P(IRQ_NOPROBE);
- P(IRQ_NOREQUEST);
- P(IRQ_NOAUTOEN);
+ d->state_use_accessors &= ~mask;
}
-#undef P
+static inline void irqd_set(struct irq_data *d, unsigned int mask)
+{
+ d->state_use_accessors |= mask;
+}
+static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
+{
+ return d->state_use_accessors & mask;
+}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2039bea31bd..dbccc799407 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -79,7 +79,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_data.chip_data = NULL;
desc->irq_data.handler_data = NULL;
desc->irq_data.msi_desc = NULL;
- desc->status = IRQ_DEFAULT_INIT_FLAGS;
+ irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
+ desc->istate = IRQS_DISABLED;
desc->handle_irq = handle_bad_irq;
desc->depth = 1;
desc->irq_count = 0;
@@ -206,6 +207,14 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
return NULL;
}
+static int irq_expand_nr_irqs(unsigned int nr)
+{
+ if (nr > IRQ_BITMAP_BITS)
+ return -ENOMEM;
+ nr_irqs = nr;
+ return 0;
+}
+
int __init early_irq_init(void)
{
int i, initcnt, node = first_online_node;
@@ -238,7 +247,7 @@ int __init early_irq_init(void)
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
- .status = IRQ_DEFAULT_INIT_FLAGS,
+ .istate = IRQS_DISABLED,
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -260,8 +269,8 @@ int __init early_irq_init(void)
for (i = 0; i < count; i++) {
desc[i].irq_data.irq = i;
desc[i].irq_data.chip = &no_irq_chip;
- /* TODO : do this allocation on-demand ... */
desc[i].kstat_irqs = alloc_percpu(unsigned int);
+ irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
alloc_masks(desc + i, GFP_KERNEL, node);
desc_smp_init(desc + i, node);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -286,24 +295,14 @@ static void free_desc(unsigned int irq)
static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
{
-#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
- struct irq_desc *desc;
- unsigned int i;
-
- for (i = 0; i < cnt; i++) {
- desc = irq_to_desc(start + i);
- if (desc && !desc->kstat_irqs) {
- unsigned int __percpu *stats = alloc_percpu(unsigned int);
-
- if (!stats)
- return -1;
- if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
- free_percpu(stats);
- }
- }
-#endif
return start;
}
+
+static int irq_expand_nr_irqs(unsigned int nr)
+{
+ return -ENOMEM;
+}
+
#endif /* !CONFIG_SPARSE_IRQ */
/* Dynamic interrupt handling */
@@ -347,14 +346,17 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
mutex_lock(&sparse_irq_lock);
- start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
+ start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
+ from, cnt, 0);
ret = -EEXIST;
if (irq >=0 && start != irq)
goto err;
- ret = -ENOMEM;
- if (start >= nr_irqs)
- goto err;
+ if (start + cnt > nr_irqs) {
+ ret = irq_expand_nr_irqs(start + cnt);
+ if (ret)
+ goto err;
+ }
bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
@@ -401,6 +403,26 @@ unsigned int irq_get_next_irq(unsigned int offset)
return find_next_bit(allocated_irqs, nr_irqs, offset);
}
+struct irq_desc *
+__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (desc) {
+ if (bus)
+ chip_bus_lock(desc);
+ raw_spin_lock_irqsave(&desc->lock, *flags);
+ }
+ return desc;
+}
+
+void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus)
+{
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (bus)
+ chip_bus_sync_unlock(desc);
+}
+
/**
* dynamic_irq_cleanup - cleanup a dynamically allocated irq
* @irq: irq number to initialize
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9033c1c7082..acd599a43bf 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,17 @@
#include "internals.h"
+#ifdef CONFIG_IRQ_FORCED_THREADING
+__read_mostly bool force_irqthreads;
+
+static int __init setup_forced_irqthreads(char *arg)
+{
+ force_irqthreads = true;
+ return 0;
+}
+early_param("threadirqs", setup_forced_irqthreads);
+#endif
+
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -30,7 +41,7 @@
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- unsigned int status;
+ unsigned int state;
if (!desc)
return;
@@ -42,16 +53,16 @@ void synchronize_irq(unsigned int irq)
* Wait until we're out of the critical section. This might
* give the wrong answer due to the lack of memory barriers.
*/
- while (desc->status & IRQ_INPROGRESS)
+ while (desc->istate & IRQS_INPROGRESS)
cpu_relax();
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
- status = desc->status;
+ state = desc->istate;
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
- } while (status & IRQ_INPROGRESS);
+ } while (state & IRQS_INPROGRESS);
/*
* We made sure that no hardirq handler is running. Now verify
@@ -73,8 +84,8 @@ int irq_can_set_affinity(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip ||
- !desc->irq_data.chip->irq_set_affinity)
+ if (!desc || !irqd_can_balance(&desc->irq_data) ||
+ !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
return 0;
return 1;
@@ -100,67 +111,169 @@ void irq_set_thread_affinity(struct irq_desc *desc)
}
}
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
+{
+ return irq_settings_can_move_pcntxt(desc);
+}
+static inline bool irq_move_pending(struct irq_desc *desc)
+{
+ return irqd_is_setaffinity_pending(&desc->irq_data);
+}
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
+{
+ cpumask_copy(desc->pending_mask, mask);
+}
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
+{
+ cpumask_copy(mask, desc->pending_mask);
+}
+#else
+static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
+static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
+#endif
+
/**
* irq_set_affinity - Set the irq affinity of a given irq
* @irq: Interrupt to set affinity
* @cpumask: cpumask
*
*/
-int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_chip *chip = desc->irq_data.chip;
unsigned long flags;
+ int ret = 0;
if (!chip->irq_set_affinity)
return -EINVAL;
raw_spin_lock_irqsave(&desc->lock, flags);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (desc->status & IRQ_MOVE_PCNTXT) {
- if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) {
- cpumask_copy(desc->irq_data.affinity, cpumask);
+ if (irq_can_move_pcntxt(desc)) {
+ ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ cpumask_copy(desc->irq_data.affinity, mask);
+ case IRQ_SET_MASK_OK_NOCOPY:
irq_set_thread_affinity(desc);
+ ret = 0;
}
+ } else {
+ irqd_set_move_pending(&desc->irq_data);
+ irq_copy_pending(desc, mask);
}
- else {
- desc->status |= IRQ_MOVE_PENDING;
- cpumask_copy(desc->pending_mask, cpumask);
- }
-#else
- if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) {
- cpumask_copy(desc->irq_data.affinity, cpumask);
- irq_set_thread_affinity(desc);
+
+ if (desc->affinity_notify) {
+ kref_get(&desc->affinity_notify->kref);
+ schedule_work(&desc->affinity_notify->work);
}
-#endif
- desc->status |= IRQ_AFFINITY_SET;
+ irq_compat_set_affinity(desc);
+ irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
raw_spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
+ return ret;
}
int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
{
+ unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
+
+ if (!desc)
+ return -EINVAL;
+ desc->affinity_hint = m;
+ irq_put_desc_unlock(desc, flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+
+static void irq_affinity_notify(struct work_struct *work)
+{
+ struct irq_affinity_notify *notify =
+ container_of(work, struct irq_affinity_notify, work);
+ struct irq_desc *desc = irq_to_desc(notify->irq);
+ cpumask_var_t cpumask;
+ unsigned long flags;
+
+ if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL))
+ goto out;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ if (irq_move_pending(desc))
+ irq_get_pending(cpumask, desc);
+ else
+ cpumask_copy(cpumask, desc->irq_data.affinity);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ notify->notify(notify, cpumask);
+
+ free_cpumask_var(cpumask);
+out:
+ kref_put(&notify->kref, notify->release);
+}
+
+/**
+ * irq_set_affinity_notifier - control notification of IRQ affinity changes
+ * @irq: Interrupt for which to enable/disable notification
+ * @notify: Context for notification, or %NULL to disable
+ * notification. Function pointers must be initialised;
+ * the other fields will be initialised by this function.
+ *
+ * Must be called in process context. Notification may only be enabled
+ * after the IRQ is allocated and must be disabled before the IRQ is
+ * freed using free_irq().
+ */
+int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+{
struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_affinity_notify *old_notify;
unsigned long flags;
+ /* The release function is promised process context */
+ might_sleep();
+
if (!desc)
return -EINVAL;
+ /* Complete initialisation of *notify */
+ if (notify) {
+ notify->irq = irq;
+ kref_init(&notify->kref);
+ INIT_WORK(&notify->work, irq_affinity_notify);
+ }
+
raw_spin_lock_irqsave(&desc->lock, flags);
- desc->affinity_hint = m;
+ old_notify = desc->affinity_notify;
+ desc->affinity_notify = notify;
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (old_notify)
+ kref_put(&old_notify->kref, old_notify->release);
+
return 0;
}
-EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
#ifndef CONFIG_AUTO_IRQ_AFFINITY
/*
* Generic version of the affinity autoselector.
*/
-static int setup_affinity(unsigned int irq, struct irq_desc *desc)
+static int
+setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct cpumask *set = irq_default_affinity;
+ int ret;
+
+ /* Excludes PER_CPU and NO_BALANCE interrupts */
if (!irq_can_set_affinity(irq))
return 0;
@@ -168,22 +281,29 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc)
* Preserve an userspace affinity setup, but make sure that
* one of the targets is online.
*/
- if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
- if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask)
- < nr_cpu_ids)
- goto set_affinity;
- else
- desc->status &= ~IRQ_AFFINITY_SET;
+ if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
+ if (cpumask_intersects(desc->irq_data.affinity,
+ cpu_online_mask))
+ set = desc->irq_data.affinity;
+ else {
+ irq_compat_clr_affinity(desc);
+ irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
+ }
}
- cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity);
-set_affinity:
- desc->irq_data.chip->irq_set_affinity(&desc->irq_data, desc->irq_data.affinity, false);
-
+ cpumask_and(mask, cpu_online_mask, set);
+ ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ cpumask_copy(desc->irq_data.affinity, mask);
+ case IRQ_SET_MASK_OK_NOCOPY:
+ irq_set_thread_affinity(desc);
+ }
return 0;
}
#else
-static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
+static inline int
+setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask)
{
return irq_select_affinity(irq);
}
@@ -192,23 +312,21 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
/*
* Called when affinity is set via /proc/irq
*/
-int irq_select_affinity_usr(unsigned int irq)
+int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&desc->lock, flags);
- ret = setup_affinity(irq, desc);
- if (!ret)
- irq_set_thread_affinity(desc);
+ ret = setup_affinity(irq, desc, mask);
raw_spin_unlock_irqrestore(&desc->lock, flags);
-
return ret;
}
#else
-static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
+static inline int
+setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
{
return 0;
}
@@ -219,13 +337,23 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
if (suspend) {
if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
return;
- desc->status |= IRQ_SUSPENDED;
+ desc->istate |= IRQS_SUSPENDED;
}
- if (!desc->depth++) {
- desc->status |= IRQ_DISABLED;
- desc->irq_data.chip->irq_disable(&desc->irq_data);
- }
+ if (!desc->depth++)
+ irq_disable(desc);
+}
+
+static int __disable_irq_nosync(unsigned int irq)
+{
+ unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
+
+ if (!desc)
+ return -EINVAL;
+ __disable_irq(desc, irq, false);
+ irq_put_desc_busunlock(desc, flags);
+ return 0;
}
/**
@@ -241,17 +369,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
*/
void disable_irq_nosync(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
-
- if (!desc)
- return;
-
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, flags);
- __disable_irq(desc, irq, false);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- chip_bus_sync_unlock(desc);
+ __disable_irq_nosync(irq);
}
EXPORT_SYMBOL(disable_irq_nosync);
@@ -269,21 +387,24 @@ EXPORT_SYMBOL(disable_irq_nosync);
*/
void disable_irq(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (!desc)
- return;
-
- disable_irq_nosync(irq);
- if (desc->action)
+ if (!__disable_irq_nosync(irq))
synchronize_irq(irq);
}
EXPORT_SYMBOL(disable_irq);
void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
{
- if (resume)
- desc->status &= ~IRQ_SUSPENDED;
+ if (resume) {
+ if (!(desc->istate & IRQS_SUSPENDED)) {
+ if (!desc->action)
+ return;
+ if (!(desc->action->flags & IRQF_FORCE_RESUME))
+ return;
+ /* Pretend that it got disabled ! */
+ desc->depth++;
+ }
+ desc->istate &= ~IRQS_SUSPENDED;
+ }
switch (desc->depth) {
case 0:
@@ -291,12 +412,11 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
break;
case 1: {
- unsigned int status = desc->status & ~IRQ_DISABLED;
-
- if (desc->status & IRQ_SUSPENDED)
+ if (desc->istate & IRQS_SUSPENDED)
goto err_out;
/* Prevent probing on this irq: */
- desc->status = status | IRQ_NOPROBE;
+ irq_settings_set_noprobe(desc);
+ irq_enable(desc);
check_irq_resend(desc, irq);
/* fall-through */
}
@@ -318,21 +438,18 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
*/
void enable_irq(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
if (!desc)
return;
+ if (WARN(!desc->irq_data.chip,
+ KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
+ goto out;
- if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
- KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
- return;
-
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, false);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- chip_bus_sync_unlock(desc);
+out:
+ irq_put_desc_busunlock(desc, flags);
}
EXPORT_SYMBOL(enable_irq);
@@ -348,7 +465,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
}
/**
- * set_irq_wake - control irq power management wakeup
+ * irq_set_irq_wake - control irq power management wakeup
* @irq: interrupt to control
* @on: enable/disable power management wakeup
*
@@ -359,23 +476,22 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
* Wakeup mode lets this IRQ wake the system from sleep
* states like "suspend to RAM".
*/
-int set_irq_wake(unsigned int irq, unsigned int on)
+int irq_set_irq_wake(unsigned int irq, unsigned int on)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
int ret = 0;
/* wakeup-capable irqs can be shared between drivers that
* don't need to have the same sleep mode behaviors.
*/
- raw_spin_lock_irqsave(&desc->lock, flags);
if (on) {
if (desc->wake_depth++ == 0) {
ret = set_irq_wake_real(irq, on);
if (ret)
desc->wake_depth = 0;
else
- desc->status |= IRQ_WAKEUP;
+ irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
}
} else {
if (desc->wake_depth == 0) {
@@ -385,14 +501,13 @@ int set_irq_wake(unsigned int irq, unsigned int on)
if (ret)
desc->wake_depth = 1;
else
- desc->status &= ~IRQ_WAKEUP;
+ irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
}
}
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_busunlock(desc, flags);
return ret;
}
-EXPORT_SYMBOL(set_irq_wake);
+EXPORT_SYMBOL(irq_set_irq_wake);
/*
* Internal function that tells the architecture code whether a
@@ -401,43 +516,27 @@ EXPORT_SYMBOL(set_irq_wake);
*/
int can_request_irq(unsigned int irq, unsigned long irqflags)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irqaction *action;
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
+ int canrequest = 0;
if (!desc)
return 0;
- if (desc->status & IRQ_NOREQUEST)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- action = desc->action;
- if (action)
- if (irqflags & action->flags & IRQF_SHARED)
- action = NULL;
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
- return !action;
-}
-
-void compat_irq_chip_set_default_handler(struct irq_desc *desc)
-{
- /*
- * If the architecture still has not overriden
- * the flow handler then zap the default. This
- * should catch incorrect flow-type setting.
- */
- if (desc->handle_irq == &handle_bad_irq)
- desc->handle_irq = NULL;
+ if (irq_settings_can_request(desc)) {
+ if (desc->action)
+ if (irqflags & desc->action->flags & IRQF_SHARED)
+ canrequest =1;
+ }
+ irq_put_desc_unlock(desc, flags);
+ return canrequest;
}
int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags)
{
- int ret;
struct irq_chip *chip = desc->irq_data.chip;
+ int ret, unmask = 0;
if (!chip || !chip->irq_set_type) {
/*
@@ -449,23 +548,43 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
return 0;
}
+ flags &= IRQ_TYPE_SENSE_MASK;
+
+ if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
+ if (!(desc->istate & IRQS_MASKED))
+ mask_irq(desc);
+ if (!(desc->istate & IRQS_DISABLED))
+ unmask = 1;
+ }
+
/* caller masked out all except trigger mode flags */
ret = chip->irq_set_type(&desc->irq_data, flags);
- if (ret)
- pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
- flags, irq, chip->irq_set_type);
- else {
- if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
- flags |= IRQ_LEVEL;
- /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
- desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
- desc->status |= flags;
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
+ irqd_set(&desc->irq_data, flags);
+
+ case IRQ_SET_MASK_OK_NOCOPY:
+ flags = irqd_get_trigger_type(&desc->irq_data);
+ irq_settings_set_trigger_mask(desc, flags);
+ irqd_clear(&desc->irq_data, IRQD_LEVEL);
+ irq_settings_clr_level(desc);
+ if (flags & IRQ_TYPE_LEVEL_MASK) {
+ irq_settings_set_level(desc);
+ irqd_set(&desc->irq_data, IRQD_LEVEL);
+ }
if (chip != desc->irq_data.chip)
irq_chip_set_defaults(desc->irq_data.chip);
+ ret = 0;
+ break;
+ default:
+ pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
+ flags, irq, chip->irq_set_type);
}
-
+ if (unmask)
+ unmask_irq(desc);
return ret;
}
@@ -509,8 +628,11 @@ static int irq_wait_for_interrupt(struct irqaction *action)
* handler finished. unmask if the interrupt has not been disabled and
* is marked MASKED.
*/
-static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
+static void irq_finalize_oneshot(struct irq_desc *desc,
+ struct irqaction *action, bool force)
{
+ if (!(desc->istate & IRQS_ONESHOT))
+ return;
again:
chip_bus_lock(desc);
raw_spin_lock_irq(&desc->lock);
@@ -522,26 +644,44 @@ again:
* The thread is faster done than the hard interrupt handler
* on the other CPU. If we unmask the irq line then the
* interrupt can come in again and masks the line, leaves due
- * to IRQ_INPROGRESS and the irq line is masked forever.
+ * to IRQS_INPROGRESS and the irq line is masked forever.
+ *
+ * This also serializes the state of shared oneshot handlers
+ * versus "desc->threads_onehsot |= action->thread_mask;" in
+ * irq_wake_thread(). See the comment there which explains the
+ * serialization.
*/
- if (unlikely(desc->status & IRQ_INPROGRESS)) {
+ if (unlikely(desc->istate & IRQS_INPROGRESS)) {
raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(desc);
cpu_relax();
goto again;
}
- if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
- desc->status &= ~IRQ_MASKED;
+ /*
+ * Now check again, whether the thread should run. Otherwise
+ * we would clear the threads_oneshot bit of this thread which
+ * was just set.
+ */
+ if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ goto out_unlock;
+
+ desc->threads_oneshot &= ~action->thread_mask;
+
+ if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) &&
+ (desc->istate & IRQS_MASKED)) {
+ irq_compat_clr_masked(desc);
+ desc->istate &= ~IRQS_MASKED;
desc->irq_data.chip->irq_unmask(&desc->irq_data);
}
+out_unlock:
raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(desc);
}
#ifdef CONFIG_SMP
/*
- * Check whether we need to change the affinity of the interrupt thread.
+ * Check whether we need to chasnge the affinity of the interrupt thread.
*/
static void
irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
@@ -573,6 +713,32 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
#endif
/*
+ * Interrupts which are not explicitely requested as threaded
+ * interrupts rely on the implicit bh/preempt disable of the hard irq
+ * context. So we need to disable bh here to avoid deadlocks and other
+ * side effects.
+ */
+static void
+irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
+{
+ local_bh_disable();
+ action->thread_fn(action->irq, action->dev_id);
+ irq_finalize_oneshot(desc, action, false);
+ local_bh_enable();
+}
+
+/*
+ * Interrupts explicitely requested as threaded interupts want to be
+ * preemtible - many of them need to sleep and wait for slow busses to
+ * complete.
+ */
+static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
+{
+ action->thread_fn(action->irq, action->dev_id);
+ irq_finalize_oneshot(desc, action, false);
+}
+
+/*
* Interrupt handler thread
*/
static int irq_thread(void *data)
@@ -582,7 +748,14 @@ static int irq_thread(void *data)
};
struct irqaction *action = data;
struct irq_desc *desc = irq_to_desc(action->irq);
- int wake, oneshot = desc->status & IRQ_ONESHOT;
+ void (*handler_fn)(struct irq_desc *desc, struct irqaction *action);
+ int wake;
+
+ if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
+ &action->thread_flags))
+ handler_fn = irq_forced_thread_fn;
+ else
+ handler_fn = irq_thread_fn;
sched_setscheduler(current, SCHED_FIFO, &param);
current->irqaction = action;
@@ -594,23 +767,20 @@ static int irq_thread(void *data)
atomic_inc(&desc->threads_active);
raw_spin_lock_irq(&desc->lock);
- if (unlikely(desc->status & IRQ_DISABLED)) {
+ if (unlikely(desc->istate & IRQS_DISABLED)) {
/*
* CHECKME: We might need a dedicated
* IRQ_THREAD_PENDING flag here, which
* retriggers the thread in check_irq_resend()
- * but AFAICT IRQ_PENDING should be fine as it
+ * but AFAICT IRQS_PENDING should be fine as it
* retriggers the interrupt itself --- tglx
*/
- desc->status |= IRQ_PENDING;
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
raw_spin_unlock_irq(&desc->lock);
} else {
raw_spin_unlock_irq(&desc->lock);
-
- action->thread_fn(action->irq, action->dev_id);
-
- if (oneshot)
- irq_finalize_oneshot(action->irq, desc);
+ handler_fn(desc, action);
}
wake = atomic_dec_and_test(&desc->threads_active);
@@ -619,6 +789,9 @@ static int irq_thread(void *data)
wake_up(&desc->wait_for_threads);
}
+ /* Prevent a stale desc->threads_oneshot */
+ irq_finalize_oneshot(desc, action, true);
+
/*
* Clear irqaction. Otherwise exit_irq_thread() would make
* fuzz about an active irq thread going into nirvana.
@@ -633,6 +806,7 @@ static int irq_thread(void *data)
void exit_irq_thread(void)
{
struct task_struct *tsk = current;
+ struct irq_desc *desc;
if (!tsk->irqaction)
return;
@@ -641,6 +815,14 @@ void exit_irq_thread(void)
"exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+ desc = irq_to_desc(tsk->irqaction->irq);
+
+ /*
+ * Prevent a stale desc->threads_oneshot. Must be called
+ * before setting the IRQTF_DIED flag.
+ */
+ irq_finalize_oneshot(desc, tsk->irqaction, true);
+
/*
* Set the THREAD DIED flag to prevent further wakeups of the
* soon to be gone threaded handler.
@@ -648,6 +830,22 @@ void exit_irq_thread(void)
set_bit(IRQTF_DIED, &tsk->irqaction->flags);
}
+static void irq_setup_forced_threading(struct irqaction *new)
+{
+ if (!force_irqthreads)
+ return;
+ if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
+ return;
+
+ new->flags |= IRQF_ONESHOT;
+
+ if (!new->thread_fn) {
+ set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
+ new->thread_fn = new->handler;
+ new->handler = irq_default_primary_handler;
+ }
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -657,9 +855,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
{
struct irqaction *old, **old_ptr;
const char *old_name = NULL;
- unsigned long flags;
- int nested, shared = 0;
- int ret;
+ unsigned long flags, thread_mask = 0;
+ int ret, nested, shared = 0;
+ cpumask_var_t mask;
if (!desc)
return -EINVAL;
@@ -683,15 +881,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
rand_initialize_irq(irq);
}
- /* Oneshot interrupts are not allowed with shared */
- if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
- return -EINVAL;
-
/*
* Check whether the interrupt nests into another interrupt
* thread.
*/
- nested = desc->status & IRQ_NESTED_THREAD;
+ nested = irq_settings_is_nested_thread(desc);
if (nested) {
if (!new->thread_fn)
return -EINVAL;
@@ -701,6 +895,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* dummy function which warns when called.
*/
new->handler = irq_nested_primary_handler;
+ } else {
+ irq_setup_forced_threading(new);
}
/*
@@ -724,6 +920,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
new->thread = t;
}
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto out_thread;
+ }
+
/*
* The following block of code has to be executed atomically
*/
@@ -735,29 +936,40 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* Can't share interrupts unless both agree to and are
* the same type (level, edge, polarity). So both flag
* fields must have IRQF_SHARED set and the bits which
- * set the trigger type must match.
+ * set the trigger type must match. Also all must
+ * agree on ONESHOT.
*/
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) {
+ ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||
+ ((old->flags ^ new->flags) & IRQF_ONESHOT)) {
old_name = old->name;
goto mismatch;
}
-#if defined(CONFIG_IRQ_PER_CPU)
/* All handlers must agree on per-cpuness */
if ((old->flags & IRQF_PERCPU) !=
(new->flags & IRQF_PERCPU))
goto mismatch;
-#endif
/* add new interrupt at end of irq queue */
do {
+ thread_mask |= old->thread_mask;
old_ptr = &old->next;
old = *old_ptr;
} while (old);
shared = 1;
}
+ /*
+ * Setup the thread mask for this irqaction. Unlikely to have
+ * 32 resp 64 irqs sharing one line, but who knows.
+ */
+ if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
+ ret = -EBUSY;
+ goto out_mask;
+ }
+ new->thread_mask = 1 << ffz(thread_mask);
+
if (!shared) {
irq_chip_set_defaults(desc->irq_data.chip);
@@ -769,42 +981,44 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
new->flags & IRQF_TRIGGER_MASK);
if (ret)
- goto out_thread;
- } else
- compat_irq_chip_set_default_handler(desc);
-#if defined(CONFIG_IRQ_PER_CPU)
- if (new->flags & IRQF_PERCPU)
- desc->status |= IRQ_PER_CPU;
-#endif
+ goto out_mask;
+ }
- desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | IRQ_ONESHOT |
- IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
+ desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
+ IRQS_INPROGRESS | IRQS_ONESHOT | \
+ IRQS_WAITING);
+
+ if (new->flags & IRQF_PERCPU) {
+ irqd_set(&desc->irq_data, IRQD_PER_CPU);
+ irq_settings_set_per_cpu(desc);
+ }
if (new->flags & IRQF_ONESHOT)
- desc->status |= IRQ_ONESHOT;
+ desc->istate |= IRQS_ONESHOT;
- if (!(desc->status & IRQ_NOAUTOEN)) {
- desc->depth = 0;
- desc->status &= ~IRQ_DISABLED;
- desc->irq_data.chip->irq_startup(&desc->irq_data);
- } else
+ if (irq_settings_can_autoenable(desc))
+ irq_startup(desc);
+ else
/* Undo nested disables: */
desc->depth = 1;
/* Exclude IRQ from balancing if requested */
- if (new->flags & IRQF_NOBALANCING)
- desc->status |= IRQ_NO_BALANCING;
+ if (new->flags & IRQF_NOBALANCING) {
+ irq_settings_set_no_balancing(desc);
+ irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ }
/* Set default affinity mask once everything is setup */
- setup_affinity(irq, desc);
-
- } else if ((new->flags & IRQF_TRIGGER_MASK)
- && (new->flags & IRQF_TRIGGER_MASK)
- != (desc->status & IRQ_TYPE_SENSE_MASK)) {
- /* hope the handler works with the actual trigger mode... */
- pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
- irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
- (int)(new->flags & IRQF_TRIGGER_MASK));
+ setup_affinity(irq, desc, mask);
+
+ } else if (new->flags & IRQF_TRIGGER_MASK) {
+ unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
+ unsigned int omsk = irq_settings_get_trigger_mask(desc);
+
+ if (nmsk != omsk)
+ /* hope the handler works with current trigger mode */
+ pr_warning("IRQ %d uses trigger mode %u; requested %u\n",
+ irq, nmsk, omsk);
}
new->irq = irq;
@@ -818,8 +1032,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* Check whether we disabled the irq via the spurious handler
* before. Reenable it and give it another chance.
*/
- if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) {
- desc->status &= ~IRQ_SPURIOUS_DISABLED;
+ if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
+ desc->istate &= ~IRQS_SPURIOUS_DISABLED;
__enable_irq(desc, irq, false);
}
@@ -849,6 +1063,9 @@ mismatch:
#endif
ret = -EBUSY;
+out_mask:
+ free_cpumask_var(mask);
+
out_thread:
raw_spin_unlock_irqrestore(&desc->lock, flags);
if (new->thread) {
@@ -871,9 +1088,14 @@ out_thread:
*/
int setup_irq(unsigned int irq, struct irqaction *act)
{
+ int retval;
struct irq_desc *desc = irq_to_desc(irq);
- return __setup_irq(irq, desc, act);
+ chip_bus_lock(desc);
+ retval = __setup_irq(irq, desc, act);
+ chip_bus_sync_unlock(desc);
+
+ return retval;
}
EXPORT_SYMBOL_GPL(setup_irq);
@@ -924,13 +1146,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
#endif
/* If this was the last handler, shut down the IRQ line: */
- if (!desc->action) {
- desc->status |= IRQ_DISABLED;
- if (desc->irq_data.chip->irq_shutdown)
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- else
- desc->irq_data.chip->irq_disable(&desc->irq_data);
- }
+ if (!desc->action)
+ irq_shutdown(desc);
#ifdef CONFIG_SMP
/* make sure affinity_hint is cleaned up */
@@ -1004,6 +1221,11 @@ void free_irq(unsigned int irq, void *dev_id)
if (!desc)
return;
+#ifdef CONFIG_SMP
+ if (WARN_ON(desc->affinity_notify))
+ desc->affinity_notify = NULL;
+#endif
+
chip_bus_lock(desc);
kfree(__free_irq(irq, dev_id));
chip_bus_sync_unlock(desc);
@@ -1074,7 +1296,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
if (!desc)
return -EINVAL;
- if (desc->status & IRQ_NOREQUEST)
+ if (!irq_settings_can_request(desc))
return -EINVAL;
if (!handler) {
@@ -1149,7 +1371,7 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
if (!desc)
return -EINVAL;
- if (desc->status & IRQ_NESTED_THREAD) {
+ if (irq_settings_is_nested_thread(desc)) {
ret = request_threaded_irq(irq, NULL, handler,
flags, name, dev_id);
return !ret ? IRQC_IS_NESTED : ret;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 441fd629ff0..ec4806d4778 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,23 +4,23 @@
#include "internals.h"
-void move_masked_irq(int irq)
+void irq_move_masked_irq(struct irq_data *idata)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_chip *chip = desc->irq_data.chip;
+ struct irq_desc *desc = irq_data_to_desc(idata);
+ struct irq_chip *chip = idata->chip;
- if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ if (likely(!irqd_is_setaffinity_pending(&desc->irq_data)))
return;
/*
* Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
*/
- if (CHECK_IRQ_PER_CPU(desc->status)) {
+ if (!irqd_can_balance(&desc->irq_data)) {
WARN_ON(1);
return;
}
- desc->status &= ~IRQ_MOVE_PENDING;
+ irqd_clr_move_pending(&desc->irq_data);
if (unlikely(cpumask_empty(desc->pending_mask)))
return;
@@ -53,15 +53,20 @@ void move_masked_irq(int irq)
cpumask_clear(desc->pending_mask);
}
-void move_native_irq(int irq)
+void move_masked_irq(int irq)
+{
+ irq_move_masked_irq(irq_get_irq_data(irq));
+}
+
+void irq_move_irq(struct irq_data *idata)
{
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_desc *desc = irq_data_to_desc(idata);
bool masked;
- if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ if (likely(!irqd_is_setaffinity_pending(idata)))
return;
- if (unlikely(desc->status & IRQ_DISABLED))
+ if (unlikely(desc->istate & IRQS_DISABLED))
return;
/*
@@ -69,10 +74,15 @@ void move_native_irq(int irq)
* threaded interrupt with ONESHOT set, we can end up with an
* interrupt storm.
*/
- masked = desc->status & IRQ_MASKED;
+ masked = desc->istate & IRQS_MASKED;
if (!masked)
- desc->irq_data.chip->irq_mask(&desc->irq_data);
- move_masked_irq(irq);
+ idata->chip->irq_mask(idata);
+ irq_move_masked_irq(idata);
if (!masked)
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ idata->chip->irq_unmask(idata);
+}
+
+void move_native_irq(int irq)
+{
+ irq_move_irq(irq_get_irq_data(irq));
}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 0d4005d85b0..f76fc00c987 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -18,7 +18,7 @@
* During system-wide suspend or hibernation device drivers need to be prevented
* from receiving interrupts and this function is provided for this purpose.
* It marks all interrupt lines in use, except for the timer ones, as disabled
- * and sets the IRQ_SUSPENDED flag for each of them.
+ * and sets the IRQS_SUSPENDED flag for each of them.
*/
void suspend_device_irqs(void)
{
@@ -34,7 +34,7 @@ void suspend_device_irqs(void)
}
for_each_irq_desc(irq, desc)
- if (desc->status & IRQ_SUSPENDED)
+ if (desc->istate & IRQS_SUSPENDED)
synchronize_irq(irq);
}
EXPORT_SYMBOL_GPL(suspend_device_irqs);
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
* resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs()
*
* Enable all interrupt lines previously disabled by suspend_device_irqs() that
- * have the IRQ_SUSPENDED flag set.
+ * have the IRQS_SUSPENDED flag set.
*/
void resume_device_irqs(void)
{
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
for_each_irq_desc(irq, desc) {
unsigned long flags;
- if (!(desc->status & IRQ_SUSPENDED))
- continue;
-
raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, true);
raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -71,9 +68,24 @@ int check_wakeup_irqs(void)
struct irq_desc *desc;
int irq;
- for_each_irq_desc(irq, desc)
- if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING))
- return -EBUSY;
+ for_each_irq_desc(irq, desc) {
+ if (irqd_is_wakeup_set(&desc->irq_data)) {
+ if (desc->istate & IRQS_PENDING)
+ return -EBUSY;
+ continue;
+ }
+ /*
+ * Check the non wakeup interrupts whether they need
+ * to be masked before finally going into suspend
+ * state. That's for hardware which has no wakeup
+ * source configuration facility. The chip
+ * implementation indicates that with
+ * IRQCHIP_MASK_ON_SUSPEND.
+ */
+ if (desc->istate & IRQS_SUSPENDED &&
+ irq_desc_get_chip(desc)->flags & IRQCHIP_MASK_ON_SUSPEND)
+ mask_irq(desc);
+ }
return 0;
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c8a2a9f8a7..4cc2e5ed0be 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -11,6 +11,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
#include "internals.h"
@@ -24,7 +25,7 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
const struct cpumask *mask = desc->irq_data.affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (desc->status & IRQ_MOVE_PENDING)
+ if (irqd_is_setaffinity_pending(&desc->irq_data))
mask = desc->pending_mask;
#endif
seq_cpumask(m, mask);
@@ -65,8 +66,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
cpumask_var_t new_value;
int err;
- if (!irq_to_desc(irq)->irq_data.chip->irq_set_affinity || no_irq_affinity ||
- irq_balancing_disabled(irq))
+ if (!irq_can_set_affinity(irq) || no_irq_affinity)
return -EIO;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
@@ -89,7 +89,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
if (!cpumask_intersects(new_value, cpu_online_mask)) {
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+ err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count;
} else {
irq_set_affinity(irq, new_value);
err = count;
@@ -357,3 +357,65 @@ void init_irq_proc(void)
}
}
+#ifdef CONFIG_GENERIC_IRQ_SHOW
+
+int __weak arch_show_interrupts(struct seq_file *p, int prec)
+{
+ return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ static int prec;
+
+ unsigned long flags, any_count = 0;
+ int i = *(loff_t *) v, j;
+ struct irqaction *action;
+ struct irq_desc *desc;
+
+ if (i > nr_irqs)
+ return 0;
+
+ if (i == nr_irqs)
+ return arch_show_interrupts(p, prec);
+
+ /* print header and calculate the width of the first column */
+ if (i == 0) {
+ for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
+ j *= 10;
+
+ seq_printf(p, "%*s", prec + 8, "");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%-8d", j);
+ seq_putc(p, '\n');
+ }
+
+ desc = irq_to_desc(i);
+ if (!desc)
+ return 0;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ for_each_online_cpu(j)
+ any_count |= kstat_irqs_cpu(i, j);
+ action = desc->action;
+ if (!action && !any_count)
+ goto out;
+
+ seq_printf(p, "%*d: ", prec, i);
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
+ seq_printf(p, "-%-8s", desc->name);
+
+ if (action) {
+ seq_printf(p, " %s", action->name);
+ while ((action = action->next) != NULL)
+ seq_printf(p, ", %s", action->name);
+ }
+
+ seq_putc(p, '\n');
+out:
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
+#endif
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index dc49358b73f..ad683a99b1e 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -55,20 +55,19 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
*/
void check_irq_resend(struct irq_desc *desc, unsigned int irq)
{
- unsigned int status = desc->status;
-
- /*
- * Make sure the interrupt is enabled, before resending it:
- */
- desc->irq_data.chip->irq_enable(&desc->irq_data);
-
/*
* We do not resend level type interrupts. Level type
* interrupts are resent by hardware when they are still
* active.
*/
- if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
+ if (irq_settings_is_level(desc))
+ return;
+ if (desc->istate & IRQS_REPLAY)
+ return;
+ if (desc->istate & IRQS_PENDING) {
+ irq_compat_clr_pending(desc);
+ desc->istate &= ~IRQS_PENDING;
+ desc->istate |= IRQS_REPLAY;
if (!desc->irq_data.chip->irq_retrigger ||
!desc->irq_data.chip->irq_retrigger(&desc->irq_data)) {
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
new file mode 100644
index 00000000000..0227ad35827
--- /dev/null
+++ b/kernel/irq/settings.h
@@ -0,0 +1,138 @@
+/*
+ * Internal header to deal with irq_desc->status which will be renamed
+ * to irq_desc->settings.
+ */
+enum {
+ _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS,
+ _IRQ_PER_CPU = IRQ_PER_CPU,
+ _IRQ_LEVEL = IRQ_LEVEL,
+ _IRQ_NOPROBE = IRQ_NOPROBE,
+ _IRQ_NOREQUEST = IRQ_NOREQUEST,
+ _IRQ_NOAUTOEN = IRQ_NOAUTOEN,
+ _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT,
+ _IRQ_NO_BALANCING = IRQ_NO_BALANCING,
+ _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
+ _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
+};
+
+#define IRQ_INPROGRESS GOT_YOU_MORON
+#define IRQ_REPLAY GOT_YOU_MORON
+#define IRQ_WAITING GOT_YOU_MORON
+#define IRQ_DISABLED GOT_YOU_MORON
+#define IRQ_PENDING GOT_YOU_MORON
+#define IRQ_MASKED GOT_YOU_MORON
+#define IRQ_WAKEUP GOT_YOU_MORON
+#define IRQ_MOVE_PENDING GOT_YOU_MORON
+#define IRQ_PER_CPU GOT_YOU_MORON
+#define IRQ_NO_BALANCING GOT_YOU_MORON
+#define IRQ_AFFINITY_SET GOT_YOU_MORON
+#define IRQ_LEVEL GOT_YOU_MORON
+#define IRQ_NOPROBE GOT_YOU_MORON
+#define IRQ_NOREQUEST GOT_YOU_MORON
+#define IRQ_NOAUTOEN GOT_YOU_MORON
+#define IRQ_NESTED_THREAD GOT_YOU_MORON
+#undef IRQF_MODIFY_MASK
+#define IRQF_MODIFY_MASK GOT_YOU_MORON
+
+static inline void
+irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
+{
+ desc->status &= ~(clr & _IRQF_MODIFY_MASK);
+ desc->status |= (set & _IRQF_MODIFY_MASK);
+}
+
+static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_PER_CPU;
+}
+
+static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_PER_CPU;
+}
+
+static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_NO_BALANCING;
+}
+
+static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_NO_BALANCING;
+}
+
+static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
+{
+ return desc->status & IRQ_TYPE_SENSE_MASK;
+}
+
+static inline void
+irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
+{
+ desc->status &= ~IRQ_TYPE_SENSE_MASK;
+ desc->status |= mask & IRQ_TYPE_SENSE_MASK;
+}
+
+static inline bool irq_settings_is_level(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_LEVEL;
+}
+
+static inline void irq_settings_clr_level(struct irq_desc *desc)
+{
+ desc->status &= ~_IRQ_LEVEL;
+}
+
+static inline void irq_settings_set_level(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_LEVEL;
+}
+
+static inline bool irq_settings_can_request(struct irq_desc *desc)
+{
+ return !(desc->status & _IRQ_NOREQUEST);
+}
+
+static inline void irq_settings_clr_norequest(struct irq_desc *desc)
+{
+ desc->status &= ~_IRQ_NOREQUEST;
+}
+
+static inline void irq_settings_set_norequest(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_NOREQUEST;
+}
+
+static inline bool irq_settings_can_probe(struct irq_desc *desc)
+{
+ return !(desc->status & _IRQ_NOPROBE);
+}
+
+static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
+{
+ desc->status &= ~_IRQ_NOPROBE;
+}
+
+static inline void irq_settings_set_noprobe(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_NOPROBE;
+}
+
+static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_MOVE_PCNTXT;
+}
+
+static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
+{
+ return !(desc->status & _IRQ_NOAUTOEN);
+}
+
+static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_NESTED_THREAD;
+}
+
+/* Nothing should touch desc->status from now on */
+#undef status
+#define status USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3089d3b9d5f..dd586ebf9c8 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,70 +21,94 @@ static int irqfixup __read_mostly;
#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
static void poll_spurious_irqs(unsigned long dummy);
static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+static int irq_poll_cpu;
+static atomic_t irq_poll_active;
+
+/*
+ * We wait here for a poller to finish.
+ *
+ * If the poll runs on this CPU, then we yell loudly and return
+ * false. That will leave the interrupt line disabled in the worst
+ * case, but it should never happen.
+ *
+ * We wait until the poller is done and then recheck disabled and
+ * action (about to be disabled). Only if it's still active, we return
+ * true and let the handler run.
+ */
+bool irq_wait_for_poll(struct irq_desc *desc)
+{
+ if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
+ "irq poll in progress on cpu %d for irq %d\n",
+ smp_processor_id(), desc->irq_data.irq))
+ return false;
+
+#ifdef CONFIG_SMP
+ do {
+ raw_spin_unlock(&desc->lock);
+ while (desc->istate & IRQS_INPROGRESS)
+ cpu_relax();
+ raw_spin_lock(&desc->lock);
+ } while (desc->istate & IRQS_INPROGRESS);
+ /* Might have been disabled in meantime */
+ return !(desc->istate & IRQS_DISABLED) && desc->action;
+#else
+ return false;
+#endif
+}
+
/*
* Recovery handler for misrouted interrupts.
*/
-static int try_one_irq(int irq, struct irq_desc *desc)
+static int try_one_irq(int irq, struct irq_desc *desc, bool force)
{
+ irqreturn_t ret = IRQ_NONE;
struct irqaction *action;
- int ok = 0, work = 0;
raw_spin_lock(&desc->lock);
- /* Already running on another processor */
- if (desc->status & IRQ_INPROGRESS) {
- /*
- * Already running: If it is shared get the other
- * CPU to go looking for our mystery interrupt too
- */
- if (desc->action && (desc->action->flags & IRQF_SHARED))
- desc->status |= IRQ_PENDING;
- raw_spin_unlock(&desc->lock);
- return ok;
- }
- /* Honour the normal IRQ locking */
- desc->status |= IRQ_INPROGRESS;
- action = desc->action;
- raw_spin_unlock(&desc->lock);
- while (action) {
- /* Only shared IRQ handlers are safe to call */
- if (action->flags & IRQF_SHARED) {
- if (action->handler(irq, action->dev_id) ==
- IRQ_HANDLED)
- ok = 1;
- }
- action = action->next;
- }
- local_irq_disable();
- /* Now clean up the flags */
- raw_spin_lock(&desc->lock);
- action = desc->action;
+ /* PER_CPU and nested thread interrupts are never polled */
+ if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc))
+ goto out;
/*
- * While we were looking for a fixup someone queued a real
- * IRQ clashing with our walk:
+ * Do not poll disabled interrupts unless the spurious
+ * disabled poller asks explicitely.
*/
- while ((desc->status & IRQ_PENDING) && action) {
+ if ((desc->istate & IRQS_DISABLED) && !force)
+ goto out;
+
+ /*
+ * All handlers must agree on IRQF_SHARED, so we test just the
+ * first. Check for action->next as well.
+ */
+ action = desc->action;
+ if (!action || !(action->flags & IRQF_SHARED) ||
+ (action->flags & __IRQF_TIMER) || !action->next)
+ goto out;
+
+ /* Already running on another processor */
+ if (desc->istate & IRQS_INPROGRESS) {
/*
- * Perform real IRQ processing for the IRQ we deferred
+ * Already running: If it is shared get the other
+ * CPU to go looking for our mystery interrupt too
*/
- work = 1;
- raw_spin_unlock(&desc->lock);
- handle_IRQ_event(irq, action);
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_PENDING;
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
+ goto out;
}
- desc->status &= ~IRQ_INPROGRESS;
- /*
- * If we did actual work for the real IRQ line we must let the
- * IRQ controller clean up too
- */
- if (work)
- irq_end(irq, desc);
- raw_spin_unlock(&desc->lock);
- return ok;
+ /* Mark it poll in progress */
+ desc->istate |= IRQS_POLL_INPROGRESS;
+ do {
+ if (handle_irq_event(desc) == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ action = desc->action;
+ } while ((desc->istate & IRQS_PENDING) && action);
+ desc->istate &= ~IRQS_POLL_INPROGRESS;
+out:
+ raw_spin_unlock(&desc->lock);
+ return ret == IRQ_HANDLED;
}
static int misrouted_irq(int irq)
@@ -92,6 +116,11 @@ static int misrouted_irq(int irq)
struct irq_desc *desc;
int i, ok = 0;
+ if (atomic_inc_return(&irq_poll_active) == 1)
+ goto out;
+
+ irq_poll_cpu = smp_processor_id();
+
for_each_irq_desc(i, desc) {
if (!i)
continue;
@@ -99,9 +128,11 @@ static int misrouted_irq(int irq)
if (i == irq) /* Already tried */
continue;
- if (try_one_irq(i, desc))
+ if (try_one_irq(i, desc, false))
ok = 1;
}
+out:
+ atomic_dec(&irq_poll_active);
/* So the caller can adjust the irq error counts */
return ok;
}
@@ -111,23 +142,28 @@ static void poll_spurious_irqs(unsigned long dummy)
struct irq_desc *desc;
int i;
+ if (atomic_inc_return(&irq_poll_active) != 1)
+ goto out;
+ irq_poll_cpu = smp_processor_id();
+
for_each_irq_desc(i, desc) {
- unsigned int status;
+ unsigned int state;
if (!i)
continue;
/* Racy but it doesn't matter */
- status = desc->status;
+ state = desc->istate;
barrier();
- if (!(status & IRQ_SPURIOUS_DISABLED))
+ if (!(state & IRQS_SPURIOUS_DISABLED))
continue;
local_irq_disable();
- try_one_irq(i, desc);
+ try_one_irq(i, desc, true);
local_irq_enable();
}
-
+out:
+ atomic_dec(&irq_poll_active);
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
@@ -139,15 +175,13 @@ static void poll_spurious_irqs(unsigned long dummy)
*
* (The other 100-of-100,000 interrupts may have been a correctly
* functioning device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
*/
-
static void
__report_bad_irq(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
struct irqaction *action;
+ unsigned long flags;
if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
printk(KERN_ERR "irq event %d: bogus return value %x\n",
@@ -159,6 +193,13 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
dump_stack();
printk(KERN_ERR "handlers:\n");
+ /*
+ * We need to take desc->lock here. note_interrupt() is called
+ * w/o desc->lock held, but IRQ_PROGRESS set. We might race
+ * with something else removing an action. It's ok to take
+ * desc->lock here. See synchronize_irq().
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
action = desc->action;
while (action) {
printk(KERN_ERR "[<%p>]", action->handler);
@@ -167,6 +208,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
printk("\n");
action = action->next;
}
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
static void
@@ -218,6 +260,9 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
void note_interrupt(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
+ if (desc->istate & IRQS_POLL_INPROGRESS)
+ return;
+
if (unlikely(action_ret != IRQ_HANDLED)) {
/*
* If we are seeing only the odd spurious IRQ caused by
@@ -254,9 +299,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
* Now kill the IRQ
*/
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
- desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
+ desc->istate |= IRQS_SPURIOUS_DISABLED;
desc->depth++;
- desc->irq_data.chip->irq_disable(&desc->irq_data);
+ irq_disable(desc);
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 05bb7173850..67fea9d25d5 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -176,7 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p)
return p->utime;
}
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+static int
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
int error = check_clock(which_clock);
if (!error) {
@@ -194,7 +195,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
return error;
}
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+static int
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
{
/*
* You can never reset a CPU clock, but we check for other errors
@@ -317,7 +319,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
}
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
+static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
{
const pid_t pid = CPUCLOCK_PID(which_clock);
int error = -EINVAL;
@@ -379,7 +381,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
* new timer already all-zeros initialized.
*/
-int posix_cpu_timer_create(struct k_itimer *new_timer)
+static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
int ret = 0;
const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
@@ -425,7 +427,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_del(struct k_itimer *timer)
+static int posix_cpu_timer_del(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
int ret = 0;
@@ -665,8 +667,8 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old)
+static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
+ struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
union cpu_time_count old_expires, new_expires, old_incr, val;
@@ -820,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
return ret;
}
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
{
union cpu_time_count now;
struct task_struct *p = timer->it.cpu.task;
@@ -1481,11 +1483,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
return error;
}
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
+
+static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
+ struct timespec *rqtp, struct timespec __user *rmtp)
{
struct restart_block *restart_block =
- &current_thread_info()->restart_block;
+ &current_thread_info()->restart_block;
struct itimerspec it;
int error;
@@ -1501,56 +1505,47 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
if (error == -ERESTART_RESTARTBLOCK) {
- if (flags & TIMER_ABSTIME)
+ if (flags & TIMER_ABSTIME)
return -ERESTARTNOHAND;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = rqtp->tv_sec;
- restart_block->arg3 = rqtp->tv_nsec;
+ restart_block->nanosleep.index = which_clock;
+ restart_block->nanosleep.rmtp = rmtp;
+ restart_block->nanosleep.expires = timespec_to_ns(rqtp);
}
return error;
}
-long posix_cpu_nsleep_restart(struct restart_block *restart_block)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
- struct timespec __user *rmtp;
+ clockid_t which_clock = restart_block->nanosleep.index;
struct timespec t;
struct itimerspec it;
int error;
- rmtp = (struct timespec __user *) restart_block->arg1;
- t.tv_sec = restart_block->arg2;
- t.tv_nsec = restart_block->arg3;
+ t = ns_to_timespec(restart_block->nanosleep.expires);
- restart_block->fn = do_no_restart_syscall;
error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
if (error == -ERESTART_RESTARTBLOCK) {
+ struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
- restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = t.tv_sec;
- restart_block->arg3 = t.tv_nsec;
+ restart_block->nanosleep.expires = timespec_to_ns(&t);
}
return error;
}
-
#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
@@ -1594,38 +1589,37 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
timer->it_clock = THREAD_CLOCK;
return posix_cpu_timer_create(timer);
}
-static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
-{
- return -EINVAL;
-}
-static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
-{
- return -EINVAL;
-}
+
+struct k_clock clock_posix_cpu = {
+ .clock_getres = posix_cpu_clock_getres,
+ .clock_set = posix_cpu_clock_set,
+ .clock_get = posix_cpu_clock_get,
+ .timer_create = posix_cpu_timer_create,
+ .nsleep = posix_cpu_nsleep,
+ .nsleep_restart = posix_cpu_nsleep_restart,
+ .timer_set = posix_cpu_timer_set,
+ .timer_del = posix_cpu_timer_del,
+ .timer_get = posix_cpu_timer_get,
+};
static __init int init_posix_cpu_timers(void)
{
struct k_clock process = {
- .clock_getres = process_cpu_clock_getres,
- .clock_get = process_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = process_cpu_timer_create,
- .nsleep = process_cpu_nsleep,
- .nsleep_restart = process_cpu_nsleep_restart,
+ .clock_getres = process_cpu_clock_getres,
+ .clock_get = process_cpu_clock_get,
+ .timer_create = process_cpu_timer_create,
+ .nsleep = process_cpu_nsleep,
+ .nsleep_restart = process_cpu_nsleep_restart,
};
struct k_clock thread = {
- .clock_getres = thread_cpu_clock_getres,
- .clock_get = thread_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = thread_cpu_timer_create,
- .nsleep = thread_cpu_nsleep,
- .nsleep_restart = thread_cpu_nsleep_restart,
+ .clock_getres = thread_cpu_clock_getres,
+ .clock_get = thread_cpu_clock_get,
+ .timer_create = thread_cpu_timer_create,
};
struct timespec ts;
- register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
- register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+ posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+ posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
cputime_to_timespec(cputime_one_jiffy, &ts);
onecputick = ts.tv_nsec;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 93bd2eb2bc5..4c0124919f9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/idr.h>
+#include <linux/posix-clock.h>
#include <linux/posix-timers.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
@@ -81,6 +82,14 @@ static DEFINE_SPINLOCK(idr_lock);
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
+/*
+ * parisc wants ENOTSUP instead of EOPNOTSUPP
+ */
+#ifndef ENOTSUP
+# define ENANOSLEEP_NOTSUP EOPNOTSUPP
+#else
+# define ENANOSLEEP_NOTSUP ENOTSUP
+#endif
/*
* The timer ID is turned into a timer address by idr_find().
@@ -94,11 +103,7 @@ static DEFINE_SPINLOCK(idr_lock);
/*
* CLOCKs: The POSIX standard calls for a couple of clocks and allows us
* to implement others. This structure defines the various
- * clocks and allows the possibility of adding others. We
- * provide an interface to add clocks to the table and expect
- * the "arch" code to add at least one clock that is high
- * resolution. Here we define the standard CLOCK_REALTIME as a
- * 1/HZ resolution clock.
+ * clocks.
*
* RESOLUTION: Clock resolution is used to round up timer and interval
* times, NOT to report clock times, which are reported with as
@@ -108,20 +113,13 @@ static DEFINE_SPINLOCK(idr_lock);
* necessary code is written. The standard says we should say
* something about this issue in the documentation...
*
- * FUNCTIONS: The CLOCKs structure defines possible functions to handle
- * various clock functions. For clocks that use the standard
- * system timer code these entries should be NULL. This will
- * allow dispatch without the overhead of indirect function
- * calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
- * must supply functions here, even if the function just returns
- * ENOSYS. The standard POSIX timer management code assumes the
- * following: 1.) The k_itimer struct (sched.h) is used for the
- * timer. 2.) The list, it_lock, it_clock, it_id and it_pid
- * fields are not modified by timer code.
+ * FUNCTIONS: The CLOCKs structure defines possible functions to
+ * handle various clock functions.
*
- * At this time all functions EXCEPT clock_nanosleep can be
- * redirected by the CLOCKS structure. Clock_nanosleep is in
- * there, but the code ignores it.
+ * The standard POSIX timer management code assumes the
+ * following: 1.) The k_itimer struct (sched.h) is used for
+ * the timer. 2.) The list, it_lock, it_clock, it_id and
+ * it_pid fields are not modified by timer code.
*
* Permissions: It is assumed that the clock_settime() function defined
* for each clock will take care of permission checks. Some
@@ -138,6 +136,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
*/
static int common_nsleep(const clockid_t, int flags, struct timespec *t,
struct timespec __user *rmtp);
+static int common_timer_create(struct k_itimer *new_timer);
static void common_timer_get(struct k_itimer *, struct itimerspec *);
static int common_timer_set(struct k_itimer *, int,
struct itimerspec *, struct itimerspec *);
@@ -158,76 +157,24 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
spin_unlock_irqrestore(&timr->it_lock, flags);
}
-/*
- * Call the k_clock hook function if non-null, or the default function.
- */
-#define CLOCK_DISPATCH(clock, call, arglist) \
- ((clock) < 0 ? posix_cpu_##call arglist : \
- (posix_clocks[clock].call != NULL \
- ? (*posix_clocks[clock].call) arglist : common_##call arglist))
-
-/*
- * Default clock hook functions when the struct k_clock passed
- * to register_posix_clock leaves a function pointer null.
- *
- * The function common_CALL is the default implementation for
- * the function pointer CALL in struct k_clock.
- */
-
-static inline int common_clock_getres(const clockid_t which_clock,
- struct timespec *tp)
-{
- tp->tv_sec = 0;
- tp->tv_nsec = posix_clocks[which_clock].res;
- return 0;
-}
-
-/*
- * Get real time for posix timers
- */
-static int common_clock_get(clockid_t which_clock, struct timespec *tp)
+/* Get clock_realtime */
+static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
{
ktime_get_real_ts(tp);
return 0;
}
-static inline int common_clock_set(const clockid_t which_clock,
- struct timespec *tp)
+/* Set clock_realtime */
+static int posix_clock_realtime_set(const clockid_t which_clock,
+ const struct timespec *tp)
{
return do_sys_settimeofday(tp, NULL);
}
-static int common_timer_create(struct k_itimer *new_timer)
-{
- hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
- return 0;
-}
-
-static int no_timer_create(struct k_itimer *new_timer)
-{
- return -EOPNOTSUPP;
-}
-
-static int no_nsleep(const clockid_t which_clock, int flags,
- struct timespec *tsave, struct timespec __user *rmtp)
-{
- return -EOPNOTSUPP;
-}
-
-/*
- * Return nonzero if we know a priori this clockid_t value is bogus.
- */
-static inline int invalid_clockid(const clockid_t which_clock)
+static int posix_clock_realtime_adj(const clockid_t which_clock,
+ struct timex *t)
{
- if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */
- return 0;
- if ((unsigned) which_clock >= MAX_CLOCKS)
- return 1;
- if (posix_clocks[which_clock].clock_getres != NULL)
- return 0;
- if (posix_clocks[which_clock].res != 0)
- return 0;
- return 1;
+ return do_adjtimex(t);
}
/*
@@ -240,7 +187,7 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
}
/*
- * Get monotonic time for posix timers
+ * Get monotonic-raw time for posix timers
*/
static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
{
@@ -267,46 +214,70 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;
}
+
+static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
+{
+ get_monotonic_boottime(tp);
+ return 0;
+}
+
+
/*
* Initialize everything, well, just everything in Posix clocks/timers ;)
*/
static __init int init_posix_timers(void)
{
struct k_clock clock_realtime = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_clock_realtime_get,
+ .clock_set = posix_clock_realtime_set,
+ .clock_adj = posix_clock_realtime_adj,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
struct k_clock clock_monotonic = {
- .clock_getres = hrtimer_get_res,
- .clock_get = posix_ktime_get_ts,
- .clock_set = do_posix_clock_nosettime,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_ktime_get_ts,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
struct k_clock clock_monotonic_raw = {
- .clock_getres = hrtimer_get_res,
- .clock_get = posix_get_monotonic_raw,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_get_monotonic_raw,
};
struct k_clock clock_realtime_coarse = {
- .clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_realtime_coarse,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_realtime_coarse,
};
struct k_clock clock_monotonic_coarse = {
- .clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_monotonic_coarse,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_monotonic_coarse,
+ };
+ struct k_clock clock_boottime = {
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_get_boottime,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
- register_posix_clock(CLOCK_REALTIME, &clock_realtime);
- register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
- register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
+ posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
+ posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+ posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+ posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -482,17 +453,29 @@ static struct pid *good_sigevent(sigevent_t * event)
return task_pid(rtn);
}
-void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
+void posix_timers_register_clock(const clockid_t clock_id,
+ struct k_clock *new_clock)
{
if ((unsigned) clock_id >= MAX_CLOCKS) {
- printk("POSIX clock register failed for clock_id %d\n",
+ printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ clock_id);
+ return;
+ }
+
+ if (!new_clock->clock_get) {
+ printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ clock_id);
+ return;
+ }
+ if (!new_clock->clock_getres) {
+ printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
clock_id);
return;
}
posix_clocks[clock_id] = *new_clock;
}
-EXPORT_SYMBOL_GPL(register_posix_clock);
+EXPORT_SYMBOL_GPL(posix_timers_register_clock);
static struct k_itimer * alloc_posix_timer(void)
{
@@ -523,19 +506,39 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
kmem_cache_free(posix_timers_cache, tmr);
}
+static struct k_clock *clockid_to_kclock(const clockid_t id)
+{
+ if (id < 0)
+ return (id & CLOCKFD_MASK) == CLOCKFD ?
+ &clock_posix_dynamic : &clock_posix_cpu;
+
+ if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
+ return NULL;
+ return &posix_clocks[id];
+}
+
+static int common_timer_create(struct k_itimer *new_timer)
+{
+ hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
+ return 0;
+}
+
/* Create a POSIX.1b interval timer. */
SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
struct sigevent __user *, timer_event_spec,
timer_t __user *, created_timer_id)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct k_itimer *new_timer;
int error, new_timer_id;
sigevent_t event;
int it_id_set = IT_ID_NOT_SET;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
+ if (!kc->timer_create)
+ return -EOPNOTSUPP;
new_timer = alloc_posix_timer();
if (unlikely(!new_timer))
@@ -597,7 +600,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
goto out;
}
- error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
+ error = kc->timer_create(new_timer);
if (error)
goto out;
@@ -607,7 +610,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
spin_unlock_irq(&current->sighand->siglock);
return 0;
- /*
+ /*
* In the case of the timer belonging to another task, after
* the task is unlocked, the timer is owned by the other task
* and may cease to exist at any time. Don't use or modify
@@ -709,22 +712,28 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct itimerspec __user *, setting)
{
- struct k_itimer *timr;
struct itimerspec cur_setting;
+ struct k_itimer *timr;
+ struct k_clock *kc;
unsigned long flags;
+ int ret = 0;
timr = lock_timer(timer_id, &flags);
if (!timr)
return -EINVAL;
- CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting));
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_get))
+ ret = -EINVAL;
+ else
+ kc->timer_get(timr, &cur_setting);
unlock_timer(timr, flags);
- if (copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
return -EFAULT;
- return 0;
+ return ret;
}
/*
@@ -813,6 +822,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
int error = 0;
unsigned long flag;
struct itimerspec *rtn = old_setting ? &old_spec : NULL;
+ struct k_clock *kc;
if (!new_setting)
return -EINVAL;
@@ -828,8 +838,11 @@ retry:
if (!timr)
return -EINVAL;
- error = CLOCK_DISPATCH(timr->it_clock, timer_set,
- (timr, flags, &new_spec, rtn));
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_set))
+ error = -EINVAL;
+ else
+ error = kc->timer_set(timr, flags, &new_spec, rtn);
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
@@ -844,7 +857,7 @@ retry:
return error;
}
-static inline int common_timer_del(struct k_itimer *timer)
+static int common_timer_del(struct k_itimer *timer)
{
timer->it.real.interval.tv64 = 0;
@@ -855,7 +868,11 @@ static inline int common_timer_del(struct k_itimer *timer)
static inline int timer_delete_hook(struct k_itimer *timer)
{
- return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer));
+ struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+
+ if (WARN_ON_ONCE(!kc || !kc->timer_del))
+ return -EINVAL;
+ return kc->timer_del(timer);
}
/* Delete a POSIX.1b interval timer. */
@@ -927,69 +944,76 @@ void exit_itimers(struct signal_struct *sig)
}
}
-/* Not available / possible... functions */
-int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
-{
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
-
-int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
- struct timespec *t, struct timespec __user *r)
-{
-#ifndef ENOTSUP
- return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
-#else /* parisc does define it separately. */
- return -ENOTSUP;
-#endif
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
-
SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
const struct timespec __user *, tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec new_tp;
- if (invalid_clockid(which_clock))
+ if (!kc || !kc->clock_set)
return -EINVAL;
+
if (copy_from_user(&new_tp, tp, sizeof (*tp)))
return -EFAULT;
- return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
+ return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
struct timespec __user *,tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec kernel_tp;
int error;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
- error = CLOCK_DISPATCH(which_clock, clock_get,
- (which_clock, &kernel_tp));
+
+ error = kc->clock_get(which_clock, &kernel_tp);
+
if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
error = -EFAULT;
return error;
+}
+
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+ struct timex __user *, utx)
+{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
+ struct timex ktx;
+ int err;
+
+ if (!kc)
+ return -EINVAL;
+ if (!kc->clock_adj)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&ktx, utx, sizeof(ktx)))
+ return -EFAULT;
+
+ err = kc->clock_adj(which_clock, &ktx);
+
+ if (!err && copy_to_user(utx, &ktx, sizeof(ktx)))
+ return -EFAULT;
+ return err;
}
SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
struct timespec __user *, tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec rtn_tp;
int error;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
- error = CLOCK_DISPATCH(which_clock, clock_getres,
- (which_clock, &rtn_tp));
+ error = kc->clock_getres(which_clock, &rtn_tp);
- if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) {
+ if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
error = -EFAULT;
- }
return error;
}
@@ -1009,10 +1033,13 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
const struct timespec __user *, rqtp,
struct timespec __user *, rmtp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec t;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
+ if (!kc->nsleep)
+ return -ENANOSLEEP_NOTSUP;
if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
return -EFAULT;
@@ -1020,27 +1047,20 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
if (!timespec_valid(&t))
return -EINVAL;
- return CLOCK_DISPATCH(which_clock, nsleep,
- (which_clock, flags, &t, rmtp));
-}
-
-/*
- * nanosleep_restart for monotonic and realtime clocks
- */
-static int common_nsleep_restart(struct restart_block *restart_block)
-{
- return hrtimer_nanosleep_restart(restart_block);
+ return kc->nsleep(which_clock, flags, &t, rmtp);
}
/*
* This will restart clock_nanosleep. This is required only by
* compat_clock_nanosleep_restart for now.
*/
-long
-clock_nanosleep_restart(struct restart_block *restart_block)
+long clock_nanosleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
+ clockid_t which_clock = restart_block->nanosleep.index;
+ struct k_clock *kc = clockid_to_kclock(which_clock);
+
+ if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
+ return -EINVAL;
- return CLOCK_DISPATCH(which_clock, nsleep_restart,
- (restart_block));
+ return kc->nsleep_restart(restart_block);
}
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c..3c7cbc2c33b 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
put_pid(waiter->deadlock_task_pid);
TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
- TRACE_WARN_ON(waiter->task);
memset(waiter, 0x22, sizeof(*waiter));
}
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef..5c9ccd38096 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/sysdev.h>
#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
int opcode;
int opdata;
int mutexes[MAX_RT_TEST_MUTEXES];
- int bkl;
int event;
struct sys_device sysdev;
};
@@ -46,9 +44,8 @@ enum test_opcodes {
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
- RTTEST_LOCKBKL, /* 9 Lock BKL */
- RTTEST_UNLOCKBKL, /* 10 Unlock BKL */
- RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
+ /* 9, 10 - reserved for BKL commemoration */
+ RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
RTTEST_RESET = 99, /* 99 Reset all pending operations */
};
@@ -74,13 +71,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[i] = 0;
}
}
-
- if (!lockwakeup && td->bkl == 4) {
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- }
return 0;
case RTTEST_RESETEVENT:
@@ -131,25 +121,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[id] = 0;
return 0;
- case RTTEST_LOCKBKL:
- if (td->bkl)
- return 0;
- td->bkl = 1;
-#ifdef CONFIG_LOCK_KERNEL
- lock_kernel();
-#endif
- td->bkl = 4;
- return 0;
-
- case RTTEST_UNLOCKBKL:
- if (td->bkl != 4)
- break;
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- return 0;
-
default:
break;
}
@@ -196,7 +167,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
break;
- case RTTEST_LOCKBKL:
default:
break;
}
@@ -229,8 +199,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
return;
- case RTTEST_LOCKBKL:
- return;
default:
return;
}
@@ -380,11 +348,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
spin_lock(&rttest_lock);
curr += sprintf(curr,
- "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:",
+ "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
td->opcode, td->event, tsk->state,
(MAX_RT_PRIO - 1) - tsk->prio,
(MAX_RT_PRIO - 1) - tsk->normal_prio,
- tsk->pi_blocked_on, td->bkl);
+ tsk->pi_blocked_on);
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786..ab449117aaf 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
/*
* lock->owner state tracking:
*
- * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
- * are used to keep track of the "owner is pending" and "lock has
- * waiters" state.
+ * lock->owner holds the task_struct pointer of the owner. Bit 0
+ * is used to keep track of the "lock has waiters" state.
*
- * owner bit1 bit0
- * NULL 0 0 lock is free (fast acquire possible)
- * NULL 0 1 invalid state
- * NULL 1 0 Transitional State*
- * NULL 1 1 invalid state
- * taskpointer 0 0 lock is held (fast release possible)
- * taskpointer 0 1 task is pending owner
- * taskpointer 1 0 lock is held and has waiters
- * taskpointer 1 1 task is pending owner and lock has more waiters
- *
- * Pending ownership is assigned to the top (highest priority)
- * waiter of the lock, when the lock is released. The thread is woken
- * up and can now take the lock. Until the lock is taken (bit 0
- * cleared) a competing higher priority thread can steal the lock
- * which puts the woken up thread back on the waiters list.
+ * owner bit0
+ * NULL 0 lock is free (fast acquire possible)
+ * NULL 1 lock is free and has waiters and the top waiter
+ * is going to take the lock*
+ * taskpointer 0 lock is held (fast release possible)
+ * taskpointer 1 lock is held and has waiters**
*
* The fast atomic compare exchange based acquire and release is only
- * possible when bit 0 and 1 of lock->owner are 0.
+ * possible when bit 0 of lock->owner is 0.
+ *
+ * (*) It also can be a transitional state when grabbing the lock
+ * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
+ * we need to set the bit0 before looking at the lock, and the owner may be
+ * NULL in this small time, hence this can be a transitional state.
*
- * (*) There's a small time where the owner can be NULL and the
- * "lock has waiters" bit is set. This can happen when grabbing the lock.
- * To prevent a cmpxchg of the owner releasing the lock, we need to set this
- * bit before looking at the lock, hence the reason this is a transitional
- * state.
+ * (**) There is a small time when bit 0 is set but there are no
+ * waiters. This can happen when grabbing the lock in the slow path.
+ * To prevent a cmpxchg of the owner releasing the lock, we need to
+ * set this bit before looking at the lock.
*/
static void
-rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
- unsigned long mask)
+rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
{
- unsigned long val = (unsigned long)owner | mask;
+ unsigned long val = (unsigned long)owner;
if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* reached or the state of the chain has changed while we
* dropped the locks.
*/
- if (!waiter || !waiter->task)
+ if (!waiter)
goto out_unlock_pi;
/*
* Check the orig_waiter state. After we dropped the locks,
- * the previous owner of the lock might have released the lock
- * and made us the pending owner:
+ * the previous owner of the lock might have released the lock.
*/
- if (orig_waiter && !orig_waiter->task)
+ if (orig_waiter && !rt_mutex_owner(orig_lock))
goto out_unlock_pi;
/*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!rt_mutex_owner(lock)) {
+ /*
+ * If the requeue above changed the top waiter, then we need
+ * to wake the new top waiter up to try to get the lock.
+ */
+
+ if (top_waiter != rt_mutex_top_waiter(lock))
+ wake_up_process(rt_mutex_top_waiter(lock)->task);
+ raw_spin_unlock(&lock->wait_lock);
+ goto out_put_task;
+ }
put_task_struct(task);
/* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
}
/*
- * Optimization: check if we can steal the lock from the
- * assigned pending owner [which might not have taken the
- * lock yet]:
- */
-static inline int try_to_steal_lock(struct rt_mutex *lock,
- struct task_struct *task)
-{
- struct task_struct *pendowner = rt_mutex_owner(lock);
- struct rt_mutex_waiter *next;
- unsigned long flags;
-
- if (!rt_mutex_owner_pending(lock))
- return 0;
-
- if (pendowner == task)
- return 1;
-
- raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
- if (task->prio >= pendowner->prio) {
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
- return 0;
- }
-
- /*
- * Check if a waiter is enqueued on the pending owners
- * pi_waiters list. Remove it and readjust pending owners
- * priority.
- */
- if (likely(!rt_mutex_has_waiters(lock))) {
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
- return 1;
- }
-
- /* No chain handling, pending owner is not blocked on anything: */
- next = rt_mutex_top_waiter(lock);
- plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
- __rt_mutex_adjust_prio(pendowner);
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
-
- /*
- * We are going to steal the lock and a waiter was
- * enqueued on the pending owners pi_waiters queue. So
- * we have to enqueue this waiter into
- * task->pi_waiters list. This covers the case,
- * where task is boosted because it holds another
- * lock and gets unboosted because the booster is
- * interrupted, so we would delay a waiter with higher
- * priority as task->normal_prio.
- *
- * Note: in the rare case of a SCHED_OTHER task changing
- * its priority and thus stealing the lock, next->task
- * might be task:
- */
- if (likely(next->task != task)) {
- raw_spin_lock_irqsave(&task->pi_lock, flags);
- plist_add(&next->pi_list_entry, &task->pi_waiters);
- __rt_mutex_adjust_prio(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- }
- return 1;
-}
-
-/*
* Try to take an rt-mutex
*
- * This fails
- * - when the lock has a real owner
- * - when a different pending owner exists and has higher priority than current
- *
* Must be called with lock->wait_lock held.
+ *
+ * @lock: the lock to be acquired.
+ * @task: the task which wants to acquire the lock
+ * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
*/
-static int try_to_take_rt_mutex(struct rt_mutex *lock)
+static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
{
/*
* We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
*/
mark_rt_mutex_waiters(lock);
- if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
+ if (rt_mutex_owner(lock))
return 0;
+ /*
+ * It will get the lock because of one of these conditions:
+ * 1) there is no waiter
+ * 2) higher priority than waiters
+ * 3) it is top waiter
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
+ if (!waiter || waiter != rt_mutex_top_waiter(lock))
+ return 0;
+ }
+ }
+
+ if (waiter || rt_mutex_has_waiters(lock)) {
+ unsigned long flags;
+ struct rt_mutex_waiter *top;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ /* remove the queued waiter. */
+ if (waiter) {
+ plist_del(&waiter->list_entry, &lock->wait_list);
+ task->pi_blocked_on = NULL;
+ }
+
+ /*
+ * We have to enqueue the top waiter(if it exists) into
+ * task->pi_waiters list.
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ top = rt_mutex_top_waiter(lock);
+ top->pi_list_entry.prio = top->list_entry.prio;
+ plist_add(&top->pi_list_entry, &task->pi_waiters);
+ }
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ }
+
/* We got the lock. */
debug_rt_mutex_lock(lock);
- rt_mutex_set_owner(lock, current, 0);
+ rt_mutex_set_owner(lock, task);
- rt_mutex_deadlock_account_lock(lock, current);
+ rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!owner)
+ return 0;
+
if (waiter == rt_mutex_top_waiter(lock)) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
/*
* Wake up the next waiter on the lock.
*
- * Remove the top waiter from the current tasks waiter list and from
- * the lock waiter list. Set it as pending owner. Then wake it up.
+ * Remove the top waiter from the current tasks waiter list and wake it up.
*
* Called with lock->wait_lock held.
*/
static void wakeup_next_waiter(struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
- struct task_struct *pendowner;
unsigned long flags;
raw_spin_lock_irqsave(&current->pi_lock, flags);
waiter = rt_mutex_top_waiter(lock);
- plist_del(&waiter->list_entry, &lock->wait_list);
/*
* Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* lock->wait_lock.
*/
plist_del(&waiter->pi_list_entry, &current->pi_waiters);
- pendowner = waiter->task;
- waiter->task = NULL;
- rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
+ rt_mutex_set_owner(lock, NULL);
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- /*
- * Clear the pi_blocked_on variable and enqueue a possible
- * waiter into the pi_waiters list of the pending owner. This
- * prevents that in case the pending owner gets unboosted a
- * waiter with higher priority than pending-owner->normal_prio
- * is blocked on the unboosted (pending) owner.
- */
- raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
-
- WARN_ON(!pendowner->pi_blocked_on);
- WARN_ON(pendowner->pi_blocked_on != waiter);
- WARN_ON(pendowner->pi_blocked_on->lock != lock);
-
- pendowner->pi_blocked_on = NULL;
-
- if (rt_mutex_has_waiters(lock)) {
- struct rt_mutex_waiter *next;
-
- next = rt_mutex_top_waiter(lock);
- plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
- }
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
-
- wake_up_process(pendowner);
+ wake_up_process(waiter->task);
}
/*
- * Remove a waiter from a lock
+ * Remove a waiter from a lock and give up
*
- * Must be called with lock->wait_lock held
+ * Must be called with lock->wait_lock held and
+ * have just failed to try_to_take_rt_mutex().
*/
static void remove_waiter(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
- waiter->task = NULL;
current->pi_blocked_on = NULL;
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- if (first && owner != current) {
+ if (!owner)
+ return;
+
+ if (first) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
- * @detect_deadlock: passed to task_blocks_on_rt_mutex
*
* lock->wait_lock must be held by the caller.
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter,
- int detect_deadlock)
+ struct rt_mutex_waiter *waiter)
{
int ret = 0;
for (;;) {
/* Try to acquire the lock: */
- if (try_to_take_rt_mutex(lock))
+ if (try_to_take_rt_mutex(lock, current, waiter))
break;
/*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- /*
- * waiter->task is NULL the first time we come here and
- * when we have been woken up by the previous owner
- * but the lock got stolen by a higher prio task.
- */
- if (!waiter->task) {
- ret = task_blocks_on_rt_mutex(lock, waiter, current,
- detect_deadlock);
- /*
- * If we got woken up by the owner then start loop
- * all over without going into schedule to try
- * to get the lock now:
- */
- if (unlikely(!waiter->task)) {
- /*
- * Reset the return value. We might
- * have returned with -EDEADLK and the
- * owner released the lock while we
- * were walking the pi chain.
- */
- ret = 0;
- continue;
- }
- if (unlikely(ret))
- break;
- }
-
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
- if (waiter->task)
- schedule_rt_mutex(lock);
+ schedule_rt_mutex(lock);
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
- waiter.task = NULL;
raw_spin_lock(&lock->wait_lock);
/* Try to acquire the lock again: */
- if (try_to_take_rt_mutex(lock)) {
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
timeout->task = NULL;
}
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
- detect_deadlock);
+ ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
+
+ if (likely(!ret))
+ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
set_current_state(TASK_RUNNING);
- if (unlikely(waiter.task))
+ if (unlikely(ret))
remove_waiter(lock, &waiter);
/*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
if (unlikely(timeout))
hrtimer_cancel(&timeout->timer);
- /*
- * Readjust priority, when we did not get the lock. We might
- * have been the pending owner and boosted. Since we did not
- * take the lock, the PI boost has to go.
- */
- if (unlikely(ret))
- rt_mutex_adjust_prio(current);
-
debug_rt_mutex_free_waiter(&waiter);
return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
if (likely(rt_mutex_owner(lock) != current)) {
- ret = try_to_take_rt_mutex(lock);
+ ret = try_to_take_rt_mutex(lock, current, NULL);
/*
* try_to_take_rt_mutex() sets the lock waiters
* bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
{
__rt_mutex_init(lock, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
- rt_mutex_set_owner(lock, proxy_owner, 0);
+ rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL, 0);
+ rt_mutex_set_owner(lock, NULL);
rt_mutex_deadlock_account_unlock(proxy_owner);
}
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
raw_spin_lock(&lock->wait_lock);
- mark_rt_mutex_waiters(lock);
-
- if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
- /* We got the lock for task. */
- debug_rt_mutex_lock(lock);
- rt_mutex_set_owner(lock, task, 0);
+ if (try_to_take_rt_mutex(lock, task, NULL)) {
raw_spin_unlock(&lock->wait_lock);
- rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
- if (ret && !waiter->task) {
+ if (ret && !rt_mutex_owner(lock)) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
*/
ret = 0;
}
+
+ if (unlikely(ret))
+ remove_waiter(lock, waiter);
+
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
set_current_state(TASK_INTERRUPTIBLE);
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
- detect_deadlock);
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
set_current_state(TASK_RUNNING);
- if (unlikely(waiter->task))
+ if (unlikely(ret))
remove_waiter(lock, waiter);
/*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
raw_spin_unlock(&lock->wait_lock);
- /*
- * Readjust priority, when we did not get the lock. We might have been
- * the pending owner and boosted. Since we did not take the lock, the
- * PI boost has to go.
- */
- if (unlikely(ret))
- rt_mutex_adjust_prio(current);
-
return ret;
}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866a..53a66c85261 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
/*
* lock->owner state tracking:
*/
-#define RT_MUTEX_OWNER_PENDING 1UL
-#define RT_MUTEX_HAS_WAITERS 2UL
-#define RT_MUTEX_OWNER_MASKALL 3UL
+#define RT_MUTEX_HAS_WAITERS 1UL
+#define RT_MUTEX_OWNER_MASKALL 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
}
-static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
-{
- return (struct task_struct *)
- ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
-}
-
-static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
-{
- return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
-}
-
/*
* PI-futex support (proxy locking functions, etc.):
*/
diff --git a/kernel/sched.c b/kernel/sched.c
index 79e611cd83d..c8e40b7005c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
- struct sched_entity *curr, *next, *last;
+ struct sched_entity *curr, *next, *last, *skip;
unsigned int nr_spread_over;
@@ -1683,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__release(rq2->lock);
}
+#else /* CONFIG_SMP */
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static void double_rq_lock(struct rq *rq1, struct rq *rq2)
+ __acquires(rq1->lock)
+ __acquires(rq2->lock)
+{
+ BUG_ON(!irqs_disabled());
+ BUG_ON(rq1 != rq2);
+ raw_spin_lock(&rq1->lock);
+ __acquire(rq2->lock); /* Fake it out ;) */
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ __releases(rq1->lock)
+ __releases(rq2->lock)
+{
+ BUG_ON(rq1 != rq2);
+ raw_spin_unlock(&rq1->lock);
+ __release(rq2->lock);
+}
+
#endif
static void calc_load_account_idle(struct rq *this_rq);
@@ -1877,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr)
*/
if (hardirq_count())
__this_cpu_add(cpu_hardirq_time, delta);
- else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+ else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
irq_time_write_end();
@@ -1917,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
sched_rt_avg_update(rq, irq_delta);
}
+static int irqtime_account_hi_update(void)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ unsigned long flags;
+ u64 latest_ns;
+ int ret = 0;
+
+ local_irq_save(flags);
+ latest_ns = this_cpu_read(cpu_hardirq_time);
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
+ ret = 1;
+ local_irq_restore(flags);
+ return ret;
+}
+
+static int irqtime_account_si_update(void)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ unsigned long flags;
+ u64 latest_ns;
+ int ret = 0;
+
+ local_irq_save(flags);
+ latest_ns = this_cpu_read(cpu_softirq_time);
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
+ ret = 1;
+ local_irq_restore(flags);
+ return ret;
+}
+
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
+#define sched_clock_irqtime (0)
+
static void update_rq_clock_task(struct rq *rq, s64 delta)
{
rq->clock_task += delta;
@@ -2022,14 +2087,14 @@ inline int task_curr(const struct task_struct *p)
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
- int oldprio, int running)
+ int oldprio)
{
if (prev_class != p->sched_class) {
if (prev_class->switched_from)
- prev_class->switched_from(rq, p, running);
- p->sched_class->switched_to(rq, p, running);
- } else
- p->sched_class->prio_changed(rq, p, oldprio, running);
+ prev_class->switched_from(rq, p);
+ p->sched_class->switched_to(rq, p);
+ } else if (oldprio != p->prio)
+ p->sched_class->prio_changed(rq, p, oldprio);
}
static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@ -2221,7 +2286,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* yield - it could be a while.
*/
if (unlikely(on_rq)) {
- schedule_timeout_uninterruptible(1);
+ ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_hrtimeout(&to, HRTIMER_MODE_REL);
continue;
}
@@ -2542,6 +2610,7 @@ static void __sched_fork(struct task_struct *p)
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
+ p->se.vruntime = 0;
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3547,6 +3616,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
}
/*
+ * Account system cpu time to a process and desired cpustat field
+ * @p: the process that the cpu time gets accounted to
+ * @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
+ * @target_cputime64: pointer to cpustat field that has to be updated
+ */
+static inline
+void __account_system_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled, cputime64_t *target_cputime64)
+{
+ cputime64_t tmp = cputime_to_cputime64(cputime);
+
+ /* Add system time to process. */
+ p->stime = cputime_add(p->stime, cputime);
+ p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
+ account_group_system_time(p, cputime);
+
+ /* Add system time to cpustat. */
+ *target_cputime64 = cputime64_add(*target_cputime64, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+
+ /* Account for system time used */
+ acct_update_integrals(p);
+}
+
+/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3557,36 +3652,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- cputime64_t tmp;
+ cputime64_t *target_cputime64;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
account_guest_time(p, cputime, cputime_scaled);
return;
}
- /* Add system time to process. */
- p->stime = cputime_add(p->stime, cputime);
- p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
- account_group_system_time(p, cputime);
-
- /* Add system time to cpustat. */
- tmp = cputime_to_cputime64(cputime);
if (hardirq_count() - hardirq_offset)
- cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ target_cputime64 = &cpustat->irq;
else if (in_serving_softirq())
- cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ target_cputime64 = &cpustat->softirq;
else
- cpustat->system = cputime64_add(cpustat->system, tmp);
+ target_cputime64 = &cpustat->system;
- cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
-
- /* Account for system time used */
- acct_update_integrals(p);
+ __account_system_time(p, cputime, cputime_scaled, target_cputime64);
}
/*
* Account for involuntary wait time.
- * @steal: the cpu time spent in involuntary wait
+ * @cputime: the cpu time spent in involuntary wait
*/
void account_steal_time(cputime_t cputime)
{
@@ -3614,6 +3699,73 @@ void account_idle_time(cputime_t cputime)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * Account a tick to a process and cpustat
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: is the tick from userspace
+ * @rq: the pointer to rq
+ *
+ * Tick demultiplexing follows the order
+ * - pending hardirq update
+ * - pending softirq update
+ * - user_time
+ * - idle_time
+ * - system time
+ * - check for guest_time
+ * - else account as system_time
+ *
+ * Check for hardirq is done both for system and user time as there is
+ * no timer going off while we are on hardirq and hence we may never get an
+ * opportunity to update it solely in system time.
+ * p->stime and friends are only updated on system time and not on irq
+ * softirq as those do not count in task exec_runtime any more.
+ */
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+ struct rq *rq)
+{
+ cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
+ cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+
+ if (irqtime_account_hi_update()) {
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ } else if (irqtime_account_si_update()) {
+ cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ } else if (this_cpu_ksoftirqd() == p) {
+ /*
+ * ksoftirqd time do not get accounted in cpu_softirq_time.
+ * So, we have to handle it separately here.
+ * Also, p->stime needs to be updated for ksoftirqd.
+ */
+ __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+ &cpustat->softirq);
+ } else if (user_tick) {
+ account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
+ } else if (p == rq->idle) {
+ account_idle_time(cputime_one_jiffy);
+ } else if (p->flags & PF_VCPU) { /* System time or guest time */
+ account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
+ } else {
+ __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+ &cpustat->system);
+ }
+}
+
+static void irqtime_account_idle_ticks(int ticks)
+{
+ int i;
+ struct rq *rq = this_rq();
+
+ for (i = 0; i < ticks; i++)
+ irqtime_account_process_tick(current, 0, rq);
+}
+#else /* CONFIG_IRQ_TIME_ACCOUNTING */
+static void irqtime_account_idle_ticks(int ticks) {}
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+ struct rq *rq) {}
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
@@ -3624,6 +3776,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
+ if (sched_clock_irqtime) {
+ irqtime_account_process_tick(p, user_tick, rq);
+ return;
+ }
+
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -3649,6 +3806,12 @@ void account_steal_ticks(unsigned long ticks)
*/
void account_idle_ticks(unsigned long ticks)
{
+
+ if (sched_clock_irqtime) {
+ irqtime_account_idle_ticks(ticks);
+ return;
+ }
+
account_idle_time(jiffies_to_cputime(ticks));
}
@@ -4189,6 +4352,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
{
__wake_up_common(q, mode, 1, 0, key);
}
+EXPORT_SYMBOL_GPL(__wake_up_locked_key);
/**
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
@@ -4546,11 +4710,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (on_rq)
enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
- check_class_changed(rq, p, prev_class, oldprio, running);
- }
+ check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, &flags);
}
@@ -4798,12 +4961,15 @@ recheck:
param->sched_priority > rlim_rtprio)
return -EPERM;
}
+
/*
- * Like positive nice levels, dont allow tasks to
- * move out of SCHED_IDLE either:
+ * Treat SCHED_IDLE as nice 20. Only allow a switch to
+ * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
*/
- if (p->policy == SCHED_IDLE && policy != SCHED_IDLE)
- return -EPERM;
+ if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
+ if (!can_nice(p, TASK_NICE(p)))
+ return -EPERM;
+ }
/* can't change other user's priorities */
if (!check_same_owner(p))
@@ -4878,11 +5044,10 @@ recheck:
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (on_rq)
activate_task(rq, p, 0);
- check_class_changed(rq, p, prev_class, oldprio, running);
- }
+ check_class_changed(rq, p, prev_class, oldprio);
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -5299,6 +5464,65 @@ void __sched yield(void)
}
EXPORT_SYMBOL(yield);
+/**
+ * yield_to - yield the current processor to another thread in
+ * your thread group, or accelerate that thread toward the
+ * processor it's on.
+ *
+ * It's the caller's job to ensure that the target task struct
+ * can't go away on us before we can do any checks.
+ *
+ * Returns true if we indeed boosted the target task.
+ */
+bool __sched yield_to(struct task_struct *p, bool preempt)
+{
+ struct task_struct *curr = current;
+ struct rq *rq, *p_rq;
+ unsigned long flags;
+ bool yielded = 0;
+
+ local_irq_save(flags);
+ rq = this_rq();
+
+again:
+ p_rq = task_rq(p);
+ double_rq_lock(rq, p_rq);
+ while (task_rq(p) != p_rq) {
+ double_rq_unlock(rq, p_rq);
+ goto again;
+ }
+
+ if (!curr->sched_class->yield_to_task)
+ goto out;
+
+ if (curr->sched_class != p->sched_class)
+ goto out;
+
+ if (task_running(p_rq, p) || p->state)
+ goto out;
+
+ yielded = curr->sched_class->yield_to_task(rq, p, preempt);
+ if (yielded) {
+ schedstat_inc(rq, yld_count);
+ /*
+ * Make p's CPU reschedule; pick_next_entity takes care of
+ * fairness.
+ */
+ if (preempt && rq != p_rq)
+ resched_task(p_rq->curr);
+ }
+
+out:
+ double_rq_unlock(rq, p_rq);
+ local_irq_restore(flags);
+
+ if (yielded)
+ schedule();
+
+ return yielded;
+}
+EXPORT_SYMBOL_GPL(yield_to);
+
/*
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
* that process accounting knows that this is a task in IO wait state.
@@ -7772,6 +7996,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
INIT_LIST_HEAD(&cfs_rq->tasks);
#ifdef CONFIG_FAIR_GROUP_SCHED
cfs_rq->rq = rq;
+ /* allow initial update_cfs_load() to truncate */
+#ifdef CONFIG_SMP
+ cfs_rq->load_stamp = 1;
+#endif
#endif
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
}
@@ -8085,6 +8313,8 @@ EXPORT_SYMBOL(__might_sleep);
#ifdef CONFIG_MAGIC_SYSRQ
static void normalize_task(struct rq *rq, struct task_struct *p)
{
+ const struct sched_class *prev_class = p->sched_class;
+ int old_prio = p->prio;
int on_rq;
on_rq = p->se.on_rq;
@@ -8095,6 +8325,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
activate_task(rq, p, 0);
resched_task(rq->curr);
}
+
+ check_class_changed(rq, p, prev_class, old_prio);
}
void normalize_rt_tasks(void)
@@ -8486,7 +8718,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
/* Propagate contribution to hierarchy */
raw_spin_lock_irqsave(&rq->lock, flags);
for_each_sched_entity(se)
- update_cfs_shares(group_cfs_rq(se), 0);
+ update_cfs_shares(group_cfs_rq(se));
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 9fb65628315..5946ac51560 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr;
static void __init autogroup_init(struct task_struct *init_task)
{
autogroup_default.tg = &root_task_group;
- root_task_group.autogroup = &autogroup_default;
kref_init(&autogroup_default.kref);
init_rwsem(&autogroup_default.lock);
init_task->signal->autogroup = &autogroup_default;
@@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
static inline bool task_group_is_autogroup(struct task_group *tg)
{
- return tg != &root_task_group && tg->autogroup;
+ return !!tg->autogroup;
}
static inline struct task_group *
@@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
p->signal->autogroup = autogroup_kref_get(ag);
+ if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
+ goto out;
+
t = p;
do {
sched_move_task(t);
} while_each_thread(p, t);
+out:
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
@@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
{
struct autogroup *ag = autogroup_task_get(p);
+ if (!task_group_is_autogroup(ag->tg))
+ goto out;
+
down_read(&ag->lock);
seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
up_read(&ag->lock);
+out:
autogroup_kref_put(ag);
}
#endif /* CONFIG_PROC_FS */
@@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
#ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
- int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
-
- if (!enabled || !tg->autogroup)
+ if (!task_group_is_autogroup(tg))
return 0;
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 7b859ffe5da..05577055cfc 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -1,6 +1,11 @@
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup {
+ /*
+ * reference doesn't mean how many thread attach to this
+ * autogroup now. It just stands for the number of task
+ * could use this autogroup.
+ */
struct kref kref;
struct task_group *tg;
struct rw_semaphore lock;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index eb6cb8edd07..7bacd83a415 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
raw_spin_lock_irqsave(&rq->lock, flags);
if (cfs_rq->rb_leftmost)
- MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
+ MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c26e2df450..3f7ec9e27ee 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8;
unsigned int sysctl_sched_child_runs_first __read_mostly;
/*
- * sys_sched_yield() compat mode
- *
- * This option switches the agressive yield implementation of the
- * old scheduler back on.
- */
-unsigned int __read_mostly sysctl_sched_compat_yield;
-
-/*
* SCHED_OTHER wake-up granularity.
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
}
-static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *left = cfs_rq->rb_leftmost;
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
return rb_entry(left, struct sched_entity, run_node);
}
+static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+{
+ struct rb_node *next = rb_next(&se->run_node);
+
+ if (!next)
+ return NULL;
+
+ return rb_entry(next, struct sched_entity, run_node);
+}
+
+#ifdef CONFIG_SCHED_DEBUG
static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
* Scheduling class statistics methods:
*/
-#ifdef CONFIG_SCHED_DEBUG
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
}
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
-static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
+static void update_cfs_shares(struct cfs_rq *cfs_rq);
/*
* Update the current task's runtime statistics. Skip current tasks that
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
now - cfs_rq->load_last > 4 * period) {
cfs_rq->load_period = 0;
cfs_rq->load_avg = 0;
+ delta = period - 1;
}
cfs_rq->load_stamp = now;
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
list_del_leaf_cfs_rq(cfs_rq);
}
-static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
- long weight_delta)
+static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
long load_weight, load, shares;
- load = cfs_rq->load.weight + weight_delta;
+ load = cfs_rq->load.weight;
load_weight = atomic_read(&tg->load_weight);
- load_weight -= cfs_rq->load_contribution;
load_weight += load;
+ load_weight -= cfs_rq->load_contribution;
shares = (tg->shares * load);
if (load_weight)
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
}
# else /* CONFIG_SMP */
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
-static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
- long weight_delta)
+static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
return tg->shares;
}
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
account_entity_enqueue(cfs_rq, se);
}
-static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+static void update_cfs_shares(struct cfs_rq *cfs_rq)
{
struct task_group *tg;
struct sched_entity *se;
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
if (likely(se->load.weight == tg->shares))
return;
#endif
- shares = calc_cfs_shares(cfs_rq, tg, weight_delta);
+ shares = calc_cfs_shares(cfs_rq, tg);
reweight_entity(cfs_rq_of(se), se, shares);
}
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
-static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
{
}
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
update_curr(cfs_rq);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, se->load.weight);
account_entity_enqueue(cfs_rq, se);
+ update_cfs_shares(cfs_rq);
if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0);
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
list_add_leaf_cfs_rq(cfs_rq);
}
-static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __clear_buddies_last(struct sched_entity *se)
+{
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->last == se)
+ cfs_rq->last = NULL;
+ else
+ break;
+ }
+}
+
+static void __clear_buddies_next(struct sched_entity *se)
{
- if (!se || cfs_rq->last == se)
- cfs_rq->last = NULL;
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->next == se)
+ cfs_rq->next = NULL;
+ else
+ break;
+ }
+}
- if (!se || cfs_rq->next == se)
- cfs_rq->next = NULL;
+static void __clear_buddies_skip(struct sched_entity *se)
+{
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->skip == se)
+ cfs_rq->skip = NULL;
+ else
+ break;
+ }
}
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- for_each_sched_entity(se)
- __clear_buddies(cfs_rq_of(se), se);
+ if (cfs_rq->last == se)
+ __clear_buddies_last(se);
+
+ if (cfs_rq->next == se)
+ __clear_buddies_next(se);
+
+ if (cfs_rq->skip == se)
+ __clear_buddies_skip(se);
}
static void
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
/*
* Normalize the entity after updating the min_vruntime because the
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return;
if (cfs_rq->nr_running > 1) {
- struct sched_entity *se = __pick_next_entity(cfs_rq);
+ struct sched_entity *se = __pick_first_entity(cfs_rq);
s64 delta = curr->vruntime - se->vruntime;
if (delta < 0)
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
+/*
+ * Pick the next process, keeping these things in mind, in this order:
+ * 1) keep things fair between processes/task groups
+ * 2) pick the "next" process, since someone really wants that to run
+ * 3) pick the "last" process, for cache locality
+ * 4) do not run the "skip" process, if something else is available
+ */
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
- struct sched_entity *se = __pick_next_entity(cfs_rq);
+ struct sched_entity *se = __pick_first_entity(cfs_rq);
struct sched_entity *left = se;
- if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
- se = cfs_rq->next;
+ /*
+ * Avoid running the skip buddy, if running something else can
+ * be done without getting too unfair.
+ */
+ if (cfs_rq->skip == se) {
+ struct sched_entity *second = __pick_next_entity(se);
+ if (second && wakeup_preempt_entity(second, left) < 1)
+ se = second;
+ }
/*
* Prefer last buddy, try to return the CPU to a preempted task.
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
se = cfs_rq->last;
+ /*
+ * Someone really wants this to run. If it's not unfair, run it.
+ */
+ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
+ se = cfs_rq->next;
+
clear_buddies(cfs_rq, se);
return se;
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
hrtick_update(rq);
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
hrtick_update(rq);
}
-/*
- * sched_yield() support is very simple - we dequeue and enqueue.
- *
- * If compat_yield is turned on then we requeue to the end of the tree.
- */
-static void yield_task_fair(struct rq *rq)
-{
- struct task_struct *curr = rq->curr;
- struct cfs_rq *cfs_rq = task_cfs_rq(curr);
- struct sched_entity *rightmost, *se = &curr->se;
-
- /*
- * Are we the only task in the tree?
- */
- if (unlikely(cfs_rq->nr_running == 1))
- return;
-
- clear_buddies(cfs_rq, se);
-
- if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
- update_rq_clock(rq);
- /*
- * Update run-time statistics of the 'current'.
- */
- update_curr(cfs_rq);
-
- return;
- }
- /*
- * Find the rightmost entry in the rbtree:
- */
- rightmost = __pick_last_entity(cfs_rq);
- /*
- * Already in the rightmost position?
- */
- if (unlikely(!rightmost || entity_before(rightmost, se)))
- return;
-
- /*
- * Minimally necessary key value to be last in the tree:
- * Upon rescheduling, sched_class::put_prev_task() will place
- * 'current' within the tree based on its new key value.
- */
- se->vruntime = rightmost->vruntime + 1;
-}
-
#ifdef CONFIG_SMP
static void task_waking_fair(struct rq *rq, struct task_struct *p)
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
}
}
+static void set_skip_buddy(struct sched_entity *se)
+{
+ if (likely(task_of(se)->policy != SCHED_IDLE)) {
+ for_each_sched_entity(se)
+ cfs_rq_of(se)->skip = se;
+ }
+}
+
/*
* Preempt the current task with a newly woken task if needed:
*/
@@ -1857,16 +1870,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (test_tsk_need_resched(curr))
return;
+ /* Idle tasks are by definition preempted by non-idle tasks. */
+ if (unlikely(curr->policy == SCHED_IDLE) &&
+ likely(p->policy != SCHED_IDLE))
+ goto preempt;
+
/*
- * Batch and idle tasks do not preempt (their preemption is driven by
- * the tick):
+ * Batch and idle tasks do not preempt non-idle tasks (their preemption
+ * is driven by the tick):
*/
if (unlikely(p->policy != SCHED_NORMAL))
return;
- /* Idle tasks are by definition preempted by everybody. */
- if (unlikely(curr->policy == SCHED_IDLE))
- goto preempt;
if (!sched_feat(WAKEUP_PREEMPT))
return;
@@ -1932,6 +1947,51 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
}
}
+/*
+ * sched_yield() is very simple
+ *
+ * The magic of dealing with the ->skip buddy is in pick_next_entity.
+ */
+static void yield_task_fair(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ struct sched_entity *se = &curr->se;
+
+ /*
+ * Are we the only task in the tree?
+ */
+ if (unlikely(rq->nr_running == 1))
+ return;
+
+ clear_buddies(cfs_rq, se);
+
+ if (curr->policy != SCHED_BATCH) {
+ update_rq_clock(rq);
+ /*
+ * Update run-time statistics of the 'current'.
+ */
+ update_curr(cfs_rq);
+ }
+
+ set_skip_buddy(se);
+}
+
+static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
+{
+ struct sched_entity *se = &p->se;
+
+ if (!se->on_rq)
+ return false;
+
+ /* Tell the scheduler that we'd really like pse to run next. */
+ set_next_buddy(se);
+
+ yield_task_fair(rq);
+
+ return true;
+}
+
#ifdef CONFIG_SMP
/**************************************************
* Fair scheduling class load-balancing methods:
@@ -2123,7 +2183,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
* We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -2610,7 +2670,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
* @load_idx: Load index of sched_domain of this_cpu for load calc.
- * @sd_idle: Idle status of the sched_domain containing group.
* @local_group: Does group contain this_cpu.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
@@ -2618,7 +2677,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
*/
static inline void update_sg_lb_stats(struct sched_domain *sd,
struct sched_group *group, int this_cpu,
- enum cpu_idle_type idle, int load_idx, int *sd_idle,
+ enum cpu_idle_type idle, int load_idx,
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
{
@@ -2638,9 +2697,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
for_each_cpu_and(i, sched_group_cpus(group), cpus) {
struct rq *rq = cpu_rq(i);
- if (*sd_idle && rq->nr_running)
- *sd_idle = 0;
-
/* Bias balancing toward cpus of our domain */
if (local_group) {
if (idle_cpu(i) && !first_idle_cpu) {
@@ -2685,7 +2741,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
/*
* Consider the group unbalanced when the imbalance is larger
- * than the average weight of two tasks.
+ * than the average weight of a task.
*
* APZ: with cgroup the avg task weight can vary wildly and
* might not be a suitable number - should we keep a
@@ -2695,7 +2751,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
if (sgs->sum_nr_running)
avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
- if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1)
+ if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
sgs->group_imb = 1;
sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
@@ -2755,15 +2811,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
* @sd: sched_domain whose statistics are to be updated.
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
- * @sd_idle: Idle status of the sched_domain containing sg.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
* @sds: variable to hold the statistics for this sched_domain.
*/
static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
- enum cpu_idle_type idle, int *sd_idle,
- const struct cpumask *cpus, int *balance,
- struct sd_lb_stats *sds)
+ enum cpu_idle_type idle, const struct cpumask *cpus,
+ int *balance, struct sd_lb_stats *sds)
{
struct sched_domain *child = sd->child;
struct sched_group *sg = sd->groups;
@@ -2781,7 +2835,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle,
+ update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
local_group, cpus, balance, &sgs);
if (local_group && !(*balance))
@@ -3033,7 +3087,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
* @imbalance: Variable which stores amount of weighted load which should
* be moved to restore balance/put a group to idle.
* @idle: The idle status of this_cpu.
- * @sd_idle: The idleness of sd
* @cpus: The set of CPUs under consideration for load-balancing.
* @balance: Pointer to a variable indicating if this_cpu
* is the appropriate cpu to perform load balancing at this_level.
@@ -3046,7 +3099,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long *imbalance, enum cpu_idle_type idle,
- int *sd_idle, const struct cpumask *cpus, int *balance)
+ const struct cpumask *cpus, int *balance)
{
struct sd_lb_stats sds;
@@ -3056,22 +3109,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
* Compute the various statistics relavent for load balancing at
* this level.
*/
- update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
- balance, &sds);
-
- /* Cases where imbalance does not exist from POV of this_cpu */
- /* 1) this_cpu is not the appropriate cpu to perform load balancing
- * at this level.
- * 2) There is no busy sibling group to pull from.
- * 3) This group is the busiest group.
- * 4) This group is more busy than the avg busieness at this
- * sched_domain.
- * 5) The imbalance is within the specified limit.
- *
- * Note: when doing newidle balance, if the local group has excess
- * capacity (i.e. nr_running < group_capacity) and the busiest group
- * does not have any capacity, we force a load balance to pull tasks
- * to the local group. In this case, we skip past checks 3, 4 and 5.
+ update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
+
+ /*
+ * this_cpu is not the appropriate cpu to perform load balancing at
+ * this level.
*/
if (!(*balance))
goto ret;
@@ -3080,41 +3122,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
check_asym_packing(sd, &sds, this_cpu, imbalance))
return sds.busiest;
+ /* There is no busy sibling group to pull tasks from */
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
- /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
+ /*
+ * If the busiest group is imbalanced the below checks don't
+ * work because they assumes all things are equal, which typically
+ * isn't true due to cpus_allowed constraints and the like.
+ */
+ if (sds.group_imb)
+ goto force_balance;
+
+ /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
!sds.busiest_has_capacity)
goto force_balance;
+ /*
+ * If the local group is more busy than the selected busiest group
+ * don't try and pull any tasks.
+ */
if (sds.this_load >= sds.max_load)
goto out_balanced;
+ /*
+ * Don't pull any tasks if this group is already above the domain
+ * average load.
+ */
sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
-
if (sds.this_load >= sds.avg_load)
goto out_balanced;
- /*
- * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
- * And to check for busy balance use !idle_cpu instead of
- * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
- * even when they are idle.
- */
- if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
- goto out_balanced;
- } else {
+ if (idle == CPU_IDLE) {
/*
* This cpu is idle. If the busiest group load doesn't
* have more tasks than the number of available cpu's and
* there is no imbalance between this and busiest group
* wrt to idle cpu's, it is balanced.
*/
- if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
+ if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
sds.busiest_nr_running <= sds.busiest_group_weight)
goto out_balanced;
+ } else {
+ /*
+ * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
+ * imbalance_pct to be conservative.
+ */
+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ goto out_balanced;
}
force_balance:
@@ -3193,7 +3249,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
-static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
+static int need_active_balance(struct sched_domain *sd, int idle,
int busiest_cpu, int this_cpu)
{
if (idle == CPU_NEWLY_IDLE) {
@@ -3225,10 +3281,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
* move_tasks() will succeed. ld_moved will be true and this
* active balance code will not be triggered.
*/
- if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- return 0;
-
if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
return 0;
}
@@ -3246,7 +3298,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
int *balance)
{
- int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
+ int ld_moved, all_pinned = 0, active_balance = 0;
struct sched_group *group;
unsigned long imbalance;
struct rq *busiest;
@@ -3255,20 +3307,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
cpumask_copy(cpus, cpu_active_mask);
- /*
- * When power savings policy is enabled for the parent domain, idle
- * sibling can pick up load irrespective of busy siblings. In this case,
- * let the state of idle sibling percolate up as CPU_IDLE, instead of
- * portraying it as CPU_NOT_IDLE.
- */
- if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- sd_idle = 1;
-
schedstat_inc(sd, lb_count[idle]);
redo:
- group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+ group = find_busiest_group(sd, this_cpu, &imbalance, idle,
cpus, balance);
if (*balance == 0)
@@ -3330,8 +3372,7 @@ redo:
if (idle != CPU_NEWLY_IDLE)
sd->nr_balance_failed++;
- if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
- this_cpu)) {
+ if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the active_load_balance_cpu_stop,
@@ -3386,10 +3427,6 @@ redo:
sd->balance_interval *= 2;
}
- if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- ld_moved = -1;
-
goto out;
out_balanced:
@@ -3403,11 +3440,7 @@ out_one_pinned:
(sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2;
- if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- ld_moved = -1;
- else
- ld_moved = 0;
+ ld_moved = 0;
out:
return ld_moved;
}
@@ -3831,8 +3864,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
if (load_balance(cpu, rq, sd, idle, &balance)) {
/*
* We've pulled tasks over so either we're no
- * longer idle, or one of our SMT siblings is
- * not idle.
+ * longer idle.
*/
idle = CPU_NOT_IDLE;
}
@@ -4079,33 +4111,62 @@ static void task_fork_fair(struct task_struct *p)
* Priority of the task has changed. Check to see if we preempt
* the current task.
*/
-static void prio_changed_fair(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
{
+ if (!p->se.on_rq)
+ return;
+
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
* this runqueue and our priority is higher than the current's
*/
- if (running) {
+ if (rq->curr == p) {
if (p->prio > oldprio)
resched_task(rq->curr);
} else
check_preempt_curr(rq, p, 0);
}
+static void switched_from_fair(struct rq *rq, struct task_struct *p)
+{
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ /*
+ * Ensure the task's vruntime is normalized, so that when its
+ * switched back to the fair class the enqueue_entity(.flags=0) will
+ * do the right thing.
+ *
+ * If it was on_rq, then the dequeue_entity(.flags=0) will already
+ * have normalized the vruntime, if it was !on_rq, then only when
+ * the task is sleeping will it still have non-normalized vruntime.
+ */
+ if (!se->on_rq && p->state != TASK_RUNNING) {
+ /*
+ * Fix up our vruntime so that the current sleep doesn't
+ * cause 'unlimited' sleep bonus.
+ */
+ place_entity(cfs_rq, se, 0);
+ se->vruntime -= cfs_rq->min_vruntime;
+ }
+}
+
/*
* We switched to the sched_fair class.
*/
-static void switched_to_fair(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
+ if (!p->se.on_rq)
+ return;
+
/*
* We were most likely switched from sched_rt, so
* kick off the schedule if running, otherwise just see
* if we can still preempt the current task.
*/
- if (running)
+ if (rq->curr == p)
resched_task(rq->curr);
else
check_preempt_curr(rq, p, 0);
@@ -4171,6 +4232,7 @@ static const struct sched_class fair_sched_class = {
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
+ .yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
@@ -4191,6 +4253,7 @@ static const struct sched_class fair_sched_class = {
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
+ .switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402c87..c82f26c1b7c 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq)
{
}
-static void switched_to_idle(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_idle(struct rq *rq, struct task_struct *p)
{
- /* Can this actually happen?? */
- if (running)
- resched_task(rq->curr);
- else
- check_preempt_curr(rq, p, 0);
+ BUG();
}
-static void prio_changed_idle(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
{
- /* This can happen for hot plug CPUS */
-
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else
- check_preempt_curr(rq, p, 0);
+ BUG();
}
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ad6267714c8..db308cb08b7 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
- int this_cpu = smp_processor_id();
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
struct sched_rt_entity *rt_se;
- rt_se = rt_rq->tg->rt_se[this_cpu];
+ int cpu = cpu_of(rq_of_rt_rq(rt_rq));
+
+ rt_se = rt_rq->tg->rt_se[cpu];
if (rt_rq->rt_nr_running) {
if (rt_se && !on_rt_rq(rt_se))
@@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- int this_cpu = smp_processor_id();
struct sched_rt_entity *rt_se;
+ int cpu = cpu_of(rq_of_rt_rq(rt_rq));
- rt_se = rt_rq->tg->rt_se[this_cpu];
+ rt_se = rt_rq->tg->rt_se[cpu];
if (rt_se && on_rt_rq(rt_se))
dequeue_rt_entity(rt_se);
@@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
- } else if (rt_rq->rt_nr_running)
+ } else if (rt_rq->rt_nr_running) {
idle = 0;
+ if (!rt_rq_throttled(rt_rq))
+ enqueue = 1;
+ }
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
@@ -1595,8 +1599,7 @@ static void rq_offline_rt(struct rq *rq)
* When switch from the rt queue, we bring ourselves to a position
* that we might want to pull RT tasks from other runqueues.
*/
-static void switched_from_rt(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_from_rt(struct rq *rq, struct task_struct *p)
{
/*
* If there are other RT tasks then we will reschedule
@@ -1605,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
* we may need to handle the pulling of RT tasks
* now.
*/
- if (!rq->rt.rt_nr_running)
+ if (p->se.on_rq && !rq->rt.rt_nr_running)
pull_rt_task(rq);
}
@@ -1624,8 +1627,7 @@ static inline void init_sched_rt_class(void)
* with RT tasks. In this case we try to push them off to
* other runqueues.
*/
-static void switched_to_rt(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
@@ -1636,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
* If that current running task is also an RT task
* then see if we can move to another run queue.
*/
- if (!running) {
+ if (p->se.on_rq && rq->curr != p) {
#ifdef CONFIG_SMP
if (rq->rt.overloaded && push_rt_task(rq) &&
/* Don't resched if we changed runqueues */
@@ -1652,10 +1654,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
* Priority of the task has changed. This may cause
* us to initiate a push or pull.
*/
-static void prio_changed_rt(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
- if (running) {
+ if (!p->se.on_rq)
+ return;
+
+ if (rq->curr == p) {
#ifdef CONFIG_SMP
/*
* If our priority decreases while running, we
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 2bf6b47058c..84ec9bcf82d 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq)
{
}
-static void switched_to_stop(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_stop(struct rq *rq, struct task_struct *p)
{
BUG(); /* its impossible to change to this class */
}
-static void prio_changed_stop(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
{
BUG(); /* how!?, what priority? */
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec38..56e5dec837f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
char *softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -311,9 +311,21 @@ void irq_enter(void)
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
-# define invoke_softirq() __do_softirq()
+static inline void invoke_softirq(void)
+{
+ if (!force_irqthreads)
+ __do_softirq();
+ else
+ wakeup_softirqd();
+}
#else
-# define invoke_softirq() do_softirq()
+static inline void invoke_softirq(void)
+{
+ if (!force_irqthreads)
+ do_softirq();
+ else
+ wakeup_softirqd();
+}
#endif
/*
@@ -721,7 +733,6 @@ static int run_ksoftirqd(void * __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
- current->flags |= PF_KSOFTIRQD;
while (!kthread_should_stop()) {
preempt_disable();
if (!local_softirq_pending()) {
@@ -738,7 +749,10 @@ static int run_ksoftirqd(void * __bind_cpu)
don't process */
if (cpu_is_offline((long)__bind_cpu))
goto wait_to_die;
- do_softirq();
+ local_irq_disable();
+ if (local_softirq_pending())
+ __do_softirq();
+ local_irq_enable();
preempt_enable_no_resched();
cond_resched();
preempt_disable();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c782fe9924c..25cc41cd8f3 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open);
/* fanotify! */
cond_syscall(sys_fanotify_init);
cond_syscall(sys_fanotify_mark);
+
+/* open by handle */
+cond_syscall(sys_name_to_handle_at);
+cond_syscall(sys_open_by_handle_at);
+cond_syscall(compat_sys_open_by_handle_at);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index daef911cbad..51054fea5d9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -194,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
static struct ctl_table root_table[];
static struct ctl_table_root sysctl_table_root;
static struct ctl_table_header root_table_header = {
- .count = 1,
+ {{.count = 1,
.ctl_table = root_table,
- .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
+ .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
.root = &sysctl_table_root,
.set = &sysctl_table_root.default_set,
};
@@ -361,20 +361,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rt_handler,
},
- {
- .procname = "sched_compat_yield",
- .data = &sysctl_sched_compat_yield,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
.data = &sysctl_sched_autogroup_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
@@ -1567,11 +1560,16 @@ void sysctl_head_get(struct ctl_table_header *head)
spin_unlock(&sysctl_lock);
}
+static void free_head(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct ctl_table_header, rcu));
+}
+
void sysctl_head_put(struct ctl_table_header *head)
{
spin_lock(&sysctl_lock);
if (!--head->count)
- kfree(head);
+ call_rcu(&head->rcu, free_head);
spin_unlock(&sysctl_lock);
}
@@ -1948,10 +1946,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
start_unregistering(header);
if (!--header->parent->count) {
WARN_ON(1);
- kfree(header->parent);
+ call_rcu(&header->parent->rcu, free_head);
}
if (!--header->count)
- kfree(header);
+ call_rcu(&header->rcu, free_head);
spin_unlock(&sysctl_lock);
}
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b875bedf7c9..3b8e028b960 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen,
void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
{
const struct bin_table *table = NULL;
- struct nameidata nd;
struct vfsmount *mnt;
struct file *file;
ssize_t result;
char *pathname;
int flags;
- int acc_mode;
pathname = sysctl_getname(name, nlen, &table);
result = PTR_ERR(pathname);
@@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen,
/* How should the sysctl be accessed? */
if (oldval && oldlen && newval && newlen) {
flags = O_RDWR;
- acc_mode = MAY_READ | MAY_WRITE;
} else if (newval && newlen) {
flags = O_WRONLY;
- acc_mode = MAY_WRITE;
} else if (oldval && oldlen) {
flags = O_RDONLY;
- acc_mode = MAY_READ;
} else {
result = 0;
goto out_putname;
}
mnt = current->nsproxy->pid_ns->proc_mnt;
- result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd);
- if (result)
- goto out_putname;
-
- result = may_open(&nd.path, acc_mode, flags);
- if (result)
- goto out_putpath;
-
- file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
+ file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
result = PTR_ERR(file);
if (IS_ERR(file))
goto out_putname;
@@ -1370,10 +1357,6 @@ out_putname:
putname(pathname);
out:
return result;
-
-out_putpath:
- path_put(&nd.path);
- goto out_putname;
}
diff --git a/kernel/time.c b/kernel/time.c
index 32174359576..8e8dc6d705c 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -150,7 +150,7 @@ static inline void warp_clock(void)
* various programs will get confused when the clock gets warped.
*/
-int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
+int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
{
static int firsttime = 1;
int error = 0;
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x)
}
/**
- * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
* @n: nsecs in u64
*
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x)
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
-unsigned long nsecs_to_jiffies(u64 n)
+u64 nsecs_to_jiffies64(u64 n)
{
#if (NSEC_PER_SEC % HZ) == 0
/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
@@ -674,22 +674,23 @@ unsigned long nsecs_to_jiffies(u64 n)
#endif
}
-#if (BITS_PER_LONG < 64)
-u64 get_jiffies_64(void)
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n: nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
{
- unsigned long seq;
- u64 ret;
-
- do {
- seq = read_seqbegin(&xtime_lock);
- ret = jiffies_64;
- } while (read_seqretry(&xtime_lock, seq));
- return ret;
+ return (unsigned long)nsecs_to_jiffies64(n);
}
-EXPORT_SYMBOL(get_jiffies_64);
-#endif
-
-EXPORT_SYMBOL(jiffies);
/*
* Add two timespec values and do a safety check for overflow.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ee266620b06..b0425991e9a 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,5 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timeconv.o posix-clock.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d7395fdfb9f..0d74b9ba90c 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,7 +18,6 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 5404a845690..b2fa506667c 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -22,8 +22,11 @@
************************************************************************/
#include <linux/clocksource.h>
#include <linux/jiffies.h>
+#include <linux/module.h>
#include <linux/init.h>
+#include "tick-internal.h"
+
/* The Jiffies based clocksource is the lowest common
* denominator clock source which should function on
* all systems. It has the same coarse resolution as
@@ -64,6 +67,23 @@ struct clocksource clocksource_jiffies = {
.shift = JIFFIES_SHIFT,
};
+#if (BITS_PER_LONG < 64)
+u64 get_jiffies_64(void)
+{
+ unsigned long seq;
+ u64 ret;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ ret = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+ return ret;
+}
+EXPORT_SYMBOL(get_jiffies_64);
+#endif
+
+EXPORT_SYMBOL(jiffies);
+
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5c00242fa92..5f1bb8e2008 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,6 +16,8 @@
#include <linux/mm.h>
#include <linux/module.h>
+#include "tick-internal.h"
+
/*
* NTP timekeeping variables:
*/
@@ -646,6 +648,17 @@ int do_adjtimex(struct timex *txc)
hrtimer_cancel(&leap_timer);
}
+ if (txc->modes & ADJ_SETOFFSET) {
+ struct timespec delta;
+ delta.tv_sec = txc->time.tv_sec;
+ delta.tv_nsec = txc->time.tv_usec;
+ if (!(txc->modes & ADJ_NANO))
+ delta.tv_nsec *= 1000;
+ result = timekeeping_inject_offset(&delta);
+ if (result)
+ return result;
+ }
+
getnstimeofday(&ts);
write_seqlock_irq(&xtime_lock);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
new file mode 100644
index 00000000000..25028dd4fa1
--- /dev/null
+++ b/kernel/time/posix-clock.c
@@ -0,0 +1,451 @@
+/*
+ * posix-clock.c - support for dynamic clock devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/posix-clock.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+static void delete_clock(struct kref *kref);
+
+/*
+ * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+ */
+static struct posix_clock *get_posix_clock(struct file *fp)
+{
+ struct posix_clock *clk = fp->private_data;
+
+ mutex_lock(&clk->mutex);
+
+ if (!clk->zombie)
+ return clk;
+
+ mutex_unlock(&clk->mutex);
+
+ return NULL;
+}
+
+static void put_posix_clock(struct posix_clock *clk)
+{
+ mutex_unlock(&clk->mutex);
+}
+
+static ssize_t posix_clock_read(struct file *fp, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -EINVAL;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.read)
+ err = clk->ops.read(clk, fp->f_flags, buf, count);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int result = 0;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.poll)
+ result = clk->ops.poll(clk, fp, wait);
+
+ put_posix_clock(clk);
+
+ return result;
+}
+
+static int posix_clock_fasync(int fd, struct file *fp, int on)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = 0;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.fasync)
+ err = clk->ops.fasync(clk, fd, fp, on);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENODEV;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.mmap)
+ err = clk->ops.mmap(clk, vma);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static long posix_clock_ioctl(struct file *fp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENOTTY;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.ioctl)
+ err = clk->ops.ioctl(clk, cmd, arg);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long posix_clock_compat_ioctl(struct file *fp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENOTTY;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.ioctl)
+ err = clk->ops.ioctl(clk, cmd, arg);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+#endif
+
+static int posix_clock_open(struct inode *inode, struct file *fp)
+{
+ int err;
+ struct posix_clock *clk =
+ container_of(inode->i_cdev, struct posix_clock, cdev);
+
+ mutex_lock(&clk->mutex);
+
+ if (clk->zombie) {
+ err = -ENODEV;
+ goto out;
+ }
+ if (clk->ops.open)
+ err = clk->ops.open(clk, fp->f_mode);
+ else
+ err = 0;
+
+ if (!err) {
+ kref_get(&clk->kref);
+ fp->private_data = clk;
+ }
+out:
+ mutex_unlock(&clk->mutex);
+ return err;
+}
+
+static int posix_clock_release(struct inode *inode, struct file *fp)
+{
+ struct posix_clock *clk = fp->private_data;
+ int err = 0;
+
+ if (clk->ops.release)
+ err = clk->ops.release(clk);
+
+ kref_put(&clk->kref, delete_clock);
+
+ fp->private_data = NULL;
+
+ return err;
+}
+
+static const struct file_operations posix_clock_file_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = posix_clock_read,
+ .poll = posix_clock_poll,
+ .unlocked_ioctl = posix_clock_ioctl,
+ .open = posix_clock_open,
+ .release = posix_clock_release,
+ .fasync = posix_clock_fasync,
+ .mmap = posix_clock_mmap,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = posix_clock_compat_ioctl,
+#endif
+};
+
+int posix_clock_register(struct posix_clock *clk, dev_t devid)
+{
+ int err;
+
+ kref_init(&clk->kref);
+ mutex_init(&clk->mutex);
+
+ cdev_init(&clk->cdev, &posix_clock_file_operations);
+ clk->cdev.owner = clk->ops.owner;
+ err = cdev_add(&clk->cdev, devid, 1);
+ if (err)
+ goto no_cdev;
+
+ return err;
+no_cdev:
+ mutex_destroy(&clk->mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(posix_clock_register);
+
+static void delete_clock(struct kref *kref)
+{
+ struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+ mutex_destroy(&clk->mutex);
+ if (clk->release)
+ clk->release(clk);
+}
+
+void posix_clock_unregister(struct posix_clock *clk)
+{
+ cdev_del(&clk->cdev);
+
+ mutex_lock(&clk->mutex);
+ clk->zombie = true;
+ mutex_unlock(&clk->mutex);
+
+ kref_put(&clk->kref, delete_clock);
+}
+EXPORT_SYMBOL_GPL(posix_clock_unregister);
+
+struct posix_clock_desc {
+ struct file *fp;
+ struct posix_clock *clk;
+};
+
+static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
+{
+ struct file *fp = fget(CLOCKID_TO_FD(id));
+ int err = -EINVAL;
+
+ if (!fp)
+ return err;
+
+ if (fp->f_op->open != posix_clock_open || !fp->private_data)
+ goto out;
+
+ cd->fp = fp;
+ cd->clk = get_posix_clock(fp);
+
+ err = cd->clk ? 0 : -ENODEV;
+out:
+ if (err)
+ fput(fp);
+ return err;
+}
+
+static void put_clock_desc(struct posix_clock_desc *cd)
+{
+ put_posix_clock(cd->clk);
+ fput(cd->fp);
+}
+
+static int pc_clock_adjtime(clockid_t id, struct timex *tx)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (cd.clk->ops.clock_adjtime)
+ err = cd.clk->ops.clock_adjtime(cd.clk, tx);
+ else
+ err = -EOPNOTSUPP;
+out:
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_gettime)
+ err = cd.clk->ops.clock_gettime(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_getres(clockid_t id, struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_getres)
+ err = cd.clk->ops.clock_getres(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_settime(clockid_t id, const struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (cd.clk->ops.clock_settime)
+ err = cd.clk->ops.clock_settime(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+out:
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_timer_create(struct k_itimer *kit)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_create)
+ err = cd.clk->ops.timer_create(cd.clk, kit);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_timer_delete(struct k_itimer *kit)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_delete)
+ err = cd.clk->ops.timer_delete(cd.clk, kit);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+
+ if (get_clock_desc(id, &cd))
+ return;
+
+ if (cd.clk->ops.timer_gettime)
+ cd.clk->ops.timer_gettime(cd.clk, kit, ts);
+
+ put_clock_desc(&cd);
+}
+
+static int pc_timer_settime(struct k_itimer *kit, int flags,
+ struct itimerspec *ts, struct itimerspec *old)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_settime)
+ err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+struct k_clock clock_posix_dynamic = {
+ .clock_getres = pc_clock_getres,
+ .clock_set = pc_clock_settime,
+ .clock_get = pc_clock_gettime,
+ .clock_adj = pc_clock_adjtime,
+ .timer_create = pc_timer_create,
+ .timer_set = pc_timer_settime,
+ .timer_del = pc_timer_delete,
+ .timer_get = pc_timer_gettime,
+};
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index a3b5aff6260..da800ffa810 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ed228ef6f6b..119528de823 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include <asm/irq_regs.h>
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f65d3a723a6..1009b06d6f8 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,6 +1,10 @@
/*
* tick internal variable and functions used by low/high res code
*/
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#define TICK_DO_TIMER_NONE -1
#define TICK_DO_TIMER_BOOT -2
@@ -135,3 +139,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
+
+#endif
+
+extern void do_timer(unsigned long ticks);
+extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 5cbc101f908..2d04411a5f0 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c55ea243347..d5097c44b40 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,7 +19,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include <linux/module.h>
#include <asm/irq_regs.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d27c7562902..3bd7e3d5c63 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(do_gettimeofday);
*
* Sets the time of day to the new time and update NTP and notify hrtimers
*/
-int do_settimeofday(struct timespec *tv)
+int do_settimeofday(const struct timespec *tv)
{
struct timespec ts_delta;
unsigned long flags;
@@ -387,6 +387,42 @@ int do_settimeofday(struct timespec *tv)
EXPORT_SYMBOL(do_settimeofday);
+
+/**
+ * timekeeping_inject_offset - Adds or subtracts from the current time.
+ * @tv: pointer to the timespec variable containing the offset
+ *
+ * Adds or subtracts an offset value from the current time.
+ */
+int timekeeping_inject_offset(struct timespec *ts)
+{
+ unsigned long flags;
+
+ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+
+ timekeeping_forward_now();
+
+ xtime = timespec_add(xtime, *ts);
+ wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+
+ timekeeper.ntp_error = 0;
+ ntp_clear();
+
+ update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+ timekeeper.mult);
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+
+ return 0;
+}
+EXPORT_SYMBOL(timekeeping_inject_offset);
+
/**
* change_clocksource - Swaps clocksources if a new one is available
*
@@ -779,7 +815,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
*
* Called from the timer interrupt, must hold a write on xtime_lock.
*/
-void update_wall_time(void)
+static void update_wall_time(void)
{
struct clocksource *clock;
cycle_t offset;
@@ -871,7 +907,7 @@ void update_wall_time(void)
* getboottime - Return the real time of system boot.
* @ts: pointer to the timespec to be set
*
- * Returns the time of day in a timespec.
+ * Returns the wall-time of boot in a timespec.
*
* This is based on the wall_to_monotonic offset and the total suspend
* time. Calls to settimeofday will affect the value returned (which
@@ -889,6 +925,55 @@ void getboottime(struct timespec *ts)
}
EXPORT_SYMBOL_GPL(getboottime);
+
+/**
+ * get_monotonic_boottime - Returns monotonic time since boot
+ * @ts: pointer to the timespec to be set
+ *
+ * Returns the monotonic time since boot in a timespec.
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
+ * includes the time spent in suspend.
+ */
+void get_monotonic_boottime(struct timespec *ts)
+{
+ struct timespec tomono, sleep;
+ unsigned int seq;
+ s64 nsecs;
+
+ WARN_ON(timekeeping_suspended);
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ *ts = xtime;
+ tomono = wall_to_monotonic;
+ sleep = total_sleep_time;
+ nsecs = timekeeping_get_ns();
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
+ ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
+}
+EXPORT_SYMBOL_GPL(get_monotonic_boottime);
+
+/**
+ * ktime_get_boottime - Returns monotonic time since boot in a ktime
+ *
+ * Returns the monotonic time since boot in a ktime
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get, but also
+ * includes the time spent in suspend.
+ */
+ktime_t ktime_get_boottime(void)
+{
+ struct timespec ts;
+
+ get_monotonic_boottime(&ts);
+ return timespec_to_ktime(ts);
+}
+EXPORT_SYMBOL_GPL(ktime_get_boottime);
+
/**
* monotonic_to_bootbased - Convert the monotonic time to boot based.
* @ts: pointer to the timespec to be converted
@@ -910,11 +995,6 @@ struct timespec __current_kernel_time(void)
return xtime;
}
-struct timespec __get_wall_to_monotonic(void)
-{
- return wall_to_monotonic;
-}
-
struct timespec current_kernel_time(void)
{
struct timespec now;
@@ -946,3 +1026,48 @@ struct timespec get_monotonic_coarse(void)
now.tv_nsec + mono.tv_nsec);
return now;
}
+
+/*
+ * The 64-bit jiffies value is not atomic - you MUST NOT read it
+ * without sampling the sequence number in xtime_lock.
+ * jiffies is defined in the linker script...
+ */
+void do_timer(unsigned long ticks)
+{
+ jiffies_64 += ticks;
+ update_wall_time();
+ calc_global_load(ticks);
+}
+
+/**
+ * get_xtime_and_monotonic_and_sleep_offset() - get xtime, wall_to_monotonic,
+ * and sleep offsets.
+ * @xtim: pointer to timespec to be set with xtime
+ * @wtom: pointer to timespec to be set with wall_to_monotonic
+ * @sleep: pointer to timespec to be set with time in suspend
+ */
+void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
+ struct timespec *wtom, struct timespec *sleep)
+{
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ *xtim = xtime;
+ *wtom = wall_to_monotonic;
+ *sleep = total_sleep_time;
+ } while (read_seqretry(&xtime_lock, seq));
+}
+
+/**
+ * xtime_update() - advances the timekeeping infrastructure
+ * @ticks: number of ticks, that have elapsed since the last call.
+ *
+ * Must be called with interrupts disabled.
+ */
+void xtime_update(unsigned long ticks)
+{
+ write_seqlock(&xtime_lock);
+ do_timer(ticks);
+ write_sequnlock(&xtime_lock);
+}
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d24..fd6198692b5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
static struct debug_obj_descr timer_debug_descr;
+static void *timer_debug_hint(void *addr)
+{
+ return ((struct timer_list *) addr)->function;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
static struct debug_obj_descr timer_debug_descr = {
.name = "timer_list",
+ .debug_hint = timer_debug_hint,
.fixup_init = timer_fixup_init,
.fixup_activate = timer_fixup_activate,
.fixup_free = timer_fixup_free,
@@ -964,6 +970,25 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
* add_timer_on(). Upon exit the timer is not queued and the handler is
* not running on any CPU.
*
+ * Note: You must not hold locks that are held in interrupt context
+ * while calling this function. Even if the lock has nothing to do
+ * with the timer in question. Here's why:
+ *
+ * CPU0 CPU1
+ * ---- ----
+ * <SOFTIRQ>
+ * call_timer_fn();
+ * base->running_timer = mytimer;
+ * spin_lock_irq(somelock);
+ * <IRQ>
+ * spin_lock(somelock);
+ * del_timer_sync(mytimer);
+ * while (base->running_timer == mytimer);
+ *
+ * Now del_timer_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but
+ * it has interrupted the softirq that CPU0 is waiting to finish.
+ *
* The function returns whether it has deactivated a pending timer or not.
*/
int del_timer_sync(struct timer_list *timer)
@@ -971,6 +996,10 @@ int del_timer_sync(struct timer_list *timer)
#ifdef CONFIG_LOCKDEP
unsigned long flags;
+ /*
+ * If lockdep gives a backtrace here, please reference
+ * the synchronization rules above.
+ */
local_irq_save(flags);
lock_map_acquire(&timer->lockdep_map);
lock_map_release(&timer->lockdep_map);
@@ -1295,19 +1324,6 @@ void run_local_timers(void)
raise_softirq(TIMER_SOFTIRQ);
}
-/*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
- * jiffies is defined in the linker script...
- */
-
-void do_timer(unsigned long ticks)
-{
- jiffies_64 += ticks;
- update_wall_time();
- calc_global_load(ticks);
-}
-
#ifdef __ARCH_WANT_SYS_ALARM
/*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee6578b578a..b5fe4c00eb3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -316,6 +316,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
static struct debug_obj_descr work_debug_descr;
+static void *work_debug_hint(void *addr)
+{
+ return ((struct work_struct *) addr)->func;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -387,6 +392,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state)
static struct debug_obj_descr work_debug_descr = {
.name = "work_struct",
+ .debug_hint = work_debug_hint,
.fixup_init = work_fixup_init,
.fixup_activate = work_fixup_activate,
.fixup_free = work_fixup_free,
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e..9d86e45086f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
static void debug_print_object(struct debug_obj *obj, char *msg)
{
+ struct debug_obj_descr *descr = obj->descr;
static int limit;
- if (limit < 5 && obj->descr != descr_test) {
+ if (limit < 5 && descr != descr_test) {
+ void *hint = descr->debug_hint ?
+ descr->debug_hint(obj->object) : NULL;
limit++;
WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
- "object type: %s\n",
+ "object type: %s hint: %pS\n",
msg, obj_states[obj->state], obj->astate,
- obj->descr->name);
+ descr->name, hint);
}
debug_objects_warnings++;
}
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d919..0ae7e643172 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
#ifdef CONFIG_DEBUG_PI_LIST
+static struct plist_head test_head;
+
static void plist_check_prev_next(struct list_head *t, struct list_head *p,
struct list_head *n)
{
@@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top)
static void plist_check_head(struct plist_head *head)
{
- WARN_ON(!head->rawlock && !head->spinlock);
+ WARN_ON(head != &test_head && !head->rawlock && !head->spinlock);
if (head->rawlock)
WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
if (head->spinlock)
WARN_ON_SMP(!spin_is_locked(head->spinlock));
- plist_check_list(&head->prio_list);
+ if (!plist_head_empty(head))
+ plist_check_list(&plist_first(head)->prio_list);
plist_check_list(&head->node_list);
}
@@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head)
*/
void plist_add(struct plist_node *node, struct plist_head *head)
{
- struct plist_node *iter;
+ struct plist_node *first, *iter, *prev = NULL;
+ struct list_head *node_next = &head->node_list;
plist_check_head(head);
WARN_ON(!plist_node_empty(node));
+ WARN_ON(!list_empty(&node->prio_list));
+
+ if (plist_head_empty(head))
+ goto ins_node;
- list_for_each_entry(iter, &head->prio_list, plist.prio_list) {
- if (node->prio < iter->prio)
- goto lt_prio;
- else if (node->prio == iter->prio) {
- iter = list_entry(iter->plist.prio_list.next,
- struct plist_node, plist.prio_list);
- goto eq_prio;
+ first = iter = plist_first(head);
+
+ do {
+ if (node->prio < iter->prio) {
+ node_next = &iter->node_list;
+ break;
}
- }
-lt_prio:
- list_add_tail(&node->plist.prio_list, &iter->plist.prio_list);
-eq_prio:
- list_add_tail(&node->plist.node_list, &iter->plist.node_list);
+ prev = iter;
+ iter = list_entry(iter->prio_list.next,
+ struct plist_node, prio_list);
+ } while (iter != first);
+
+ if (!prev || prev->prio != node->prio)
+ list_add_tail(&node->prio_list, &iter->prio_list);
+ins_node:
+ list_add_tail(&node->node_list, node_next);
plist_check_head(head);
}
@@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
{
plist_check_head(head);
- if (!list_empty(&node->plist.prio_list)) {
- struct plist_node *next = plist_first(&node->plist);
+ if (!list_empty(&node->prio_list)) {
+ if (node->node_list.next != &head->node_list) {
+ struct plist_node *next;
+
+ next = list_entry(node->node_list.next,
+ struct plist_node, node_list);
- list_move_tail(&next->plist.prio_list, &node->plist.prio_list);
- list_del_init(&node->plist.prio_list);
+ /* add the next plist_node into prio_list */
+ if (list_empty(&next->prio_list))
+ list_add(&next->prio_list, &node->prio_list);
+ }
+ list_del_init(&node->prio_list);
}
- list_del_init(&node->plist.node_list);
+ list_del_init(&node->node_list);
plist_check_head(head);
}
+
+#ifdef CONFIG_DEBUG_PI_LIST
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+static struct plist_node __initdata test_node[241];
+
+static void __init plist_test_check(int nr_expect)
+{
+ struct plist_node *first, *prio_pos, *node_pos;
+
+ if (plist_head_empty(&test_head)) {
+ BUG_ON(nr_expect != 0);
+ return;
+ }
+
+ prio_pos = first = plist_first(&test_head);
+ plist_for_each(node_pos, &test_head) {
+ if (nr_expect-- < 0)
+ break;
+ if (node_pos == first)
+ continue;
+ if (node_pos->prio == prio_pos->prio) {
+ BUG_ON(!list_empty(&node_pos->prio_list));
+ continue;
+ }
+
+ BUG_ON(prio_pos->prio > node_pos->prio);
+ BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
+ prio_pos = node_pos;
+ }
+
+ BUG_ON(nr_expect != 0);
+ BUG_ON(prio_pos->prio_list.next != &first->prio_list);
+}
+
+static int __init plist_test(void)
+{
+ int nr_expect = 0, i, loop;
+ unsigned int r = local_clock();
+
+ printk(KERN_INFO "start plist test\n");
+ plist_head_init(&test_head, NULL);
+ for (i = 0; i < ARRAY_SIZE(test_node); i++)
+ plist_node_init(test_node + i, 0);
+
+ for (loop = 0; loop < 1000; loop++) {
+ r = r * 193939 % 47629;
+ i = r % ARRAY_SIZE(test_node);
+ if (plist_node_empty(test_node + i)) {
+ r = r * 193939 % 47629;
+ test_node[i].prio = r % 99;
+ plist_add(test_node + i, &test_head);
+ nr_expect++;
+ } else {
+ plist_del(test_node + i, &test_head);
+ nr_expect--;
+ }
+ plist_test_check(nr_expect);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_node); i++) {
+ if (plist_node_empty(test_node + i))
+ continue;
+ plist_del(test_node + i, &test_head);
+ nr_expect--;
+ plist_test_check(nr_expect);
+ }
+
+ printk(KERN_INFO "end plist test\n");
+ return 0;
+}
+
+module_init(plist_test);
+
+#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf..aa7c3052261 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/*
* wait for the read lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_read_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
-RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
/*
* wait for the write lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_write_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
-RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
-asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
-asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
diff --git a/mm/Makefile b/mm/Makefile
index 2b1b575ae71..42a8326c3e3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -7,7 +7,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
-obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
+obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
@@ -15,6 +15,12 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
$(mmu-y)
obj-y += init-mm.o
+ifdef CONFIG_NO_BOOTMEM
+ obj-y += nobootmem.o
+else
+ obj-y += bootmem.o
+endif
+
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 13b0caa9793..07aeb89e396 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -23,6 +23,13 @@
#include "internal.h"
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+struct pglist_data __refdata contig_page_data = {
+ .bdata = &bootmem_node_data[0]
+};
+EXPORT_SYMBOL(contig_page_data);
+#endif
+
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
@@ -35,7 +42,6 @@ unsigned long max_pfn;
unsigned long saved_max_pfn;
#endif
-#ifndef CONFIG_NO_BOOTMEM
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -146,7 +152,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
min_low_pfn = start;
return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
}
-#endif
+
/*
* free_bootmem_late - free bootmem pages directly to page allocator
* @addr: starting address of the range
@@ -171,53 +177,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
}
}
-#ifdef CONFIG_NO_BOOTMEM
-static void __init __free_pages_memory(unsigned long start, unsigned long end)
-{
- int i;
- unsigned long start_aligned, end_aligned;
- int order = ilog2(BITS_PER_LONG);
-
- start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
- end_aligned = end & ~(BITS_PER_LONG - 1);
-
- if (end_aligned <= start_aligned) {
- for (i = start; i < end; i++)
- __free_pages_bootmem(pfn_to_page(i), 0);
-
- return;
- }
-
- for (i = start; i < start_aligned; i++)
- __free_pages_bootmem(pfn_to_page(i), 0);
-
- for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
- __free_pages_bootmem(pfn_to_page(i), order);
-
- for (i = end_aligned; i < end; i++)
- __free_pages_bootmem(pfn_to_page(i), 0);
-}
-
-unsigned long __init free_all_memory_core_early(int nodeid)
-{
- int i;
- u64 start, end;
- unsigned long count = 0;
- struct range *range = NULL;
- int nr_range;
-
- nr_range = get_free_all_memory_range(&range, nodeid);
-
- for (i = 0; i < nr_range; i++) {
- start = range[i].start;
- end = range[i].end;
- count += end - start;
- __free_pages_memory(start, end);
- }
-
- return count;
-}
-#else
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
int aligned;
@@ -278,7 +237,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
return count;
}
-#endif
/**
* free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -289,12 +247,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
register_page_bootmem_info_node(pgdat);
-#ifdef CONFIG_NO_BOOTMEM
- /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
- return 0;
-#else
return free_all_bootmem_core(pgdat->bdata);
-#endif
}
/**
@@ -304,16 +257,6 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
*/
unsigned long __init free_all_bootmem(void)
{
-#ifdef CONFIG_NO_BOOTMEM
- /*
- * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
- * because in some case like Node0 doesnt have RAM installed
- * low ram will be on Node1
- * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
- * will be used instead of only Node0 related
- */
- return free_all_memory_core_early(MAX_NUMNODES);
-#else
unsigned long total_pages = 0;
bootmem_data_t *bdata;
@@ -321,10 +264,8 @@ unsigned long __init free_all_bootmem(void)
total_pages += free_all_bootmem_core(bdata);
return total_pages;
-#endif
}
-#ifndef CONFIG_NO_BOOTMEM
static void __init __free(bootmem_data_t *bdata,
unsigned long sidx, unsigned long eidx)
{
@@ -419,7 +360,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
}
BUG();
}
-#endif
/**
* free_bootmem_node - mark a page range as usable
@@ -434,10 +374,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
{
-#ifdef CONFIG_NO_BOOTMEM
- kmemleak_free_part(__va(physaddr), size);
- memblock_x86_free_range(physaddr, physaddr + size);
-#else
unsigned long start, end;
kmemleak_free_part(__va(physaddr), size);
@@ -446,7 +382,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
end = PFN_DOWN(physaddr + size);
mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
-#endif
}
/**
@@ -460,10 +395,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
*/
void __init free_bootmem(unsigned long addr, unsigned long size)
{
-#ifdef CONFIG_NO_BOOTMEM
- kmemleak_free_part(__va(addr), size);
- memblock_x86_free_range(addr, addr + size);
-#else
unsigned long start, end;
kmemleak_free_part(__va(addr), size);
@@ -472,7 +403,6 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
end = PFN_DOWN(addr + size);
mark_bootmem(start, end, 0, 0);
-#endif
}
/**
@@ -489,17 +419,12 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
-#ifdef CONFIG_NO_BOOTMEM
- panic("no bootmem");
- return 0;
-#else
unsigned long start, end;
start = PFN_DOWN(physaddr);
end = PFN_UP(physaddr + size);
return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
-#endif
}
/**
@@ -515,20 +440,14 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
-#ifdef CONFIG_NO_BOOTMEM
- panic("no bootmem");
- return 0;
-#else
unsigned long start, end;
start = PFN_DOWN(addr);
end = PFN_UP(addr + size);
return mark_bootmem(start, end, 1, flags);
-#endif
}
-#ifndef CONFIG_NO_BOOTMEM
int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
@@ -685,33 +604,12 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
#endif
return NULL;
}
-#endif
static void * __init ___alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
unsigned long goal,
unsigned long limit)
{
-#ifdef CONFIG_NO_BOOTMEM
- void *ptr;
-
- if (WARN_ON_ONCE(slab_is_available()))
- return kzalloc(size, GFP_NOWAIT);
-
-restart:
-
- ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
-
- if (ptr)
- return ptr;
-
- if (goal != 0) {
- goal = 0;
- goto restart;
- }
-
- return NULL;
-#else
bootmem_data_t *bdata;
void *region;
@@ -737,7 +635,6 @@ restart:
}
return NULL;
-#endif
}
/**
@@ -758,10 +655,6 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
{
unsigned long limit = 0;
-#ifdef CONFIG_NO_BOOTMEM
- limit = -1UL;
-#endif
-
return ___alloc_bootmem_nopanic(size, align, goal, limit);
}
@@ -798,14 +691,9 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
{
unsigned long limit = 0;
-#ifdef CONFIG_NO_BOOTMEM
- limit = -1UL;
-#endif
-
return ___alloc_bootmem(size, align, goal, limit);
}
-#ifndef CONFIG_NO_BOOTMEM
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
unsigned long size, unsigned long align,
unsigned long goal, unsigned long limit)
@@ -822,7 +710,6 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
return ___alloc_bootmem(size, align, goal, limit);
}
-#endif
/**
* __alloc_bootmem_node - allocate boot memory from a specific node
@@ -842,24 +729,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
- void *ptr;
-
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
- goal, -1ULL);
- if (ptr)
- return ptr;
-
- ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
- goal, -1ULL);
-#else
- ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
-#endif
-
- return ptr;
+ return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
}
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -880,13 +753,8 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
unsigned long new_goal;
new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
- new_goal, -1ULL);
-#else
ptr = alloc_bootmem_core(pgdat->bdata, size, align,
new_goal, 0);
-#endif
if (ptr)
return ptr;
}
@@ -907,16 +775,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
void * __init alloc_bootmem_section(unsigned long size,
unsigned long section_nr)
{
-#ifdef CONFIG_NO_BOOTMEM
- unsigned long pfn, goal, limit;
-
- pfn = section_nr_to_pfn(section_nr);
- goal = pfn << PAGE_SHIFT;
- limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
-
- return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
- SMP_CACHE_BYTES, goal, limit);
-#else
bootmem_data_t *bdata;
unsigned long pfn, goal, limit;
@@ -926,7 +784,6 @@ void * __init alloc_bootmem_section(unsigned long size,
bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
-#endif
}
#endif
@@ -938,16 +795,11 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
- goal, -1ULL);
-#else
ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
if (ptr)
return ptr;
ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
-#endif
if (ptr)
return ptr;
@@ -995,21 +847,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
- void *ptr;
-
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ return ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
- if (ptr)
- return ptr;
- ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
- goal, ARCH_LOW_ADDRESS_LIMIT);
-#else
- ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
- goal, ARCH_LOW_ADDRESS_LIMIT);
-#endif
- return ptr;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dbe99a5f207..113e35c4750 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1762,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
#ifndef CONFIG_NUMA
VM_BUG_ON(!*hpage);
new_page = *hpage;
+ if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
+ up_read(&mm->mmap_sem);
+ return;
+ }
#else
VM_BUG_ON(*hpage);
/*
@@ -1781,12 +1785,12 @@ static void collapse_huge_page(struct mm_struct *mm,
*hpage = ERR_PTR(-ENOMEM);
return;
}
-#endif
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
up_read(&mm->mmap_sem);
put_page(new_page);
return;
}
+#endif
/* after allocating the hugepage upgrade to mmap_sem write mode */
up_read(&mm->mmap_sem);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
new file mode 100644
index 00000000000..e2bdb07079c
--- /dev/null
+++ b/mm/nobootmem.c
@@ -0,0 +1,435 @@
+/*
+ * bootmem - A boot-time physical memory allocator and configurator
+ *
+ * Copyright (C) 1999 Ingo Molnar
+ * 1999 Kanoj Sarcar, SGI
+ * 2008 Johannes Weiner
+ *
+ * Access to this subsystem has to be serialized externally (which is true
+ * for the boot process anyway).
+ */
+#include <linux/init.h>
+#include <linux/pfn.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/kmemleak.h>
+#include <linux/range.h>
+#include <linux/memblock.h>
+
+#include <asm/bug.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#include "internal.h"
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+struct pglist_data __refdata contig_page_data;
+EXPORT_SYMBOL(contig_page_data);
+#endif
+
+unsigned long max_low_pfn;
+unsigned long min_low_pfn;
+unsigned long max_pfn;
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * If we have booted due to a crash, max_pfn will be a very low value. We need
+ * to know the amount of memory that the previous kernel used.
+ */
+unsigned long saved_max_pfn;
+#endif
+
+static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+ u64 goal, u64 limit)
+{
+ void *ptr;
+ u64 addr;
+
+ if (limit > memblock.current_limit)
+ limit = memblock.current_limit;
+
+ addr = find_memory_core_early(nid, size, align, goal, limit);
+
+ if (addr == MEMBLOCK_ERROR)
+ return NULL;
+
+ ptr = phys_to_virt(addr);
+ memset(ptr, 0, size);
+ memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
+ /*
+ * The min_count is set to 0 so that bootmem allocated blocks
+ * are never reported as leaks.
+ */
+ kmemleak_alloc(ptr, size, 0, 0);
+ return ptr;
+}
+
+/*
+ * free_bootmem_late - free bootmem pages directly to page allocator
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * This is only useful when the bootmem allocator has already been torn
+ * down, but we are still initializing the system. Pages are given directly
+ * to the page allocator, no bootmem metadata is updated because it is gone.
+ */
+void __init free_bootmem_late(unsigned long addr, unsigned long size)
+{
+ unsigned long cursor, end;
+
+ kmemleak_free_part(__va(addr), size);
+
+ cursor = PFN_UP(addr);
+ end = PFN_DOWN(addr + size);
+
+ for (; cursor < end; cursor++) {
+ __free_pages_bootmem(pfn_to_page(cursor), 0);
+ totalram_pages++;
+ }
+}
+
+static void __init __free_pages_memory(unsigned long start, unsigned long end)
+{
+ int i;
+ unsigned long start_aligned, end_aligned;
+ int order = ilog2(BITS_PER_LONG);
+
+ start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+ end_aligned = end & ~(BITS_PER_LONG - 1);
+
+ if (end_aligned <= start_aligned) {
+ for (i = start; i < end; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+
+ return;
+ }
+
+ for (i = start; i < start_aligned; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+
+ for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
+ __free_pages_bootmem(pfn_to_page(i), order);
+
+ for (i = end_aligned; i < end; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+}
+
+unsigned long __init free_all_memory_core_early(int nodeid)
+{
+ int i;
+ u64 start, end;
+ unsigned long count = 0;
+ struct range *range = NULL;
+ int nr_range;
+
+ nr_range = get_free_all_memory_range(&range, nodeid);
+
+ for (i = 0; i < nr_range; i++) {
+ start = range[i].start;
+ end = range[i].end;
+ count += end - start;
+ __free_pages_memory(start, end);
+ }
+
+ return count;
+}
+
+/**
+ * free_all_bootmem_node - release a node's free pages to the buddy allocator
+ * @pgdat: node to be released
+ *
+ * Returns the number of pages actually released.
+ */
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
+{
+ register_page_bootmem_info_node(pgdat);
+
+ /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
+ return 0;
+}
+
+/**
+ * free_all_bootmem - release free pages to the buddy allocator
+ *
+ * Returns the number of pages actually released.
+ */
+unsigned long __init free_all_bootmem(void)
+{
+ /*
+ * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
+ * because in some case like Node0 doesnt have RAM installed
+ * low ram will be on Node1
+ * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
+ * will be used instead of only Node0 related
+ */
+ return free_all_memory_core_early(MAX_NUMNODES);
+}
+
+/**
+ * free_bootmem_node - mark a page range as usable
+ * @pgdat: node the range resides on
+ * @physaddr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * Partial pages will be considered reserved and left as they are.
+ *
+ * The range must reside completely on the specified node.
+ */
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+ unsigned long size)
+{
+ kmemleak_free_part(__va(physaddr), size);
+ memblock_x86_free_range(physaddr, physaddr + size);
+}
+
+/**
+ * free_bootmem - mark a page range as usable
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * Partial pages will be considered reserved and left as they are.
+ *
+ * The range must be contiguous but may span node boundaries.
+ */
+void __init free_bootmem(unsigned long addr, unsigned long size)
+{
+ kmemleak_free_part(__va(addr), size);
+ memblock_x86_free_range(addr, addr + size);
+}
+
+static void * __init ___alloc_bootmem_nopanic(unsigned long size,
+ unsigned long align,
+ unsigned long goal,
+ unsigned long limit)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc(size, GFP_NOWAIT);
+
+restart:
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+
+ if (ptr)
+ return ptr;
+
+ if (goal != 0) {
+ goal = 0;
+ goto restart;
+ }
+
+ return NULL;
+}
+
+/**
+ * __alloc_bootmem_nopanic - allocate boot memory without panicking
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * Returns NULL on failure.
+ */
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
+ unsigned long goal)
+{
+ unsigned long limit = -1UL;
+
+ return ___alloc_bootmem_nopanic(size, align, goal, limit);
+}
+
+static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
+ unsigned long goal, unsigned long limit)
+{
+ void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
+
+ if (mem)
+ return mem;
+ /*
+ * Whoops, we cannot satisfy the allocation request.
+ */
+ printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
+ panic("Out of memory");
+ return NULL;
+}
+
+/**
+ * __alloc_bootmem - allocate boot memory
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem(unsigned long size, unsigned long align,
+ unsigned long goal)
+{
+ unsigned long limit = -1UL;
+
+ return ___alloc_bootmem(size, align, goal, limit);
+}
+
+/**
+ * __alloc_bootmem_node - allocate boot memory from a specific node
+ * @pgdat: node to allocate from
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may fall back to any node in the system if the specified node
+ * can not hold the requested memory.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ return __alloc_memory_core_early(MAX_NUMNODES, size, align,
+ goal, -1ULL);
+}
+
+void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+#ifdef MAX_DMA32_PFN
+ unsigned long end_pfn;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ /* update goal according ...MAX_DMA32_PFN */
+ end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+
+ if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
+ (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
+ void *ptr;
+ unsigned long new_goal;
+
+ new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ new_goal, -1ULL);
+ if (ptr)
+ return ptr;
+ }
+#endif
+
+ return __alloc_bootmem_node(pgdat, size, align, goal);
+
+}
+
+#ifdef CONFIG_SPARSEMEM
+/**
+ * alloc_bootmem_section - allocate boot memory from a specific section
+ * @size: size of the request in bytes
+ * @section_nr: sparse map section to allocate from
+ *
+ * Return NULL on failure.
+ */
+void * __init alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr)
+{
+ unsigned long pfn, goal, limit;
+
+ pfn = section_nr_to_pfn(section_nr);
+ goal = pfn << PAGE_SHIFT;
+ limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+
+ return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
+ SMP_CACHE_BYTES, goal, limit);
+}
+#endif
+
+void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ return __alloc_bootmem_nopanic(size, align, goal);
+}
+
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
+#endif
+
+/**
+ * __alloc_bootmem_low - allocate low boot memory
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
+ unsigned long goal)
+{
+ return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
+}
+
+/**
+ * __alloc_bootmem_low_node - allocate low boot memory from a specific node
+ * @pgdat: node to allocate from
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may fall back to any node in the system if the specified node
+ * can not hold the requested memory.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+
+ return __alloc_memory_core_early(MAX_NUMNODES, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cdef1d4b4e4..bd7625676a6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3699,13 +3699,45 @@ void __init free_bootmem_with_active_regions(int nid,
}
#ifdef CONFIG_HAVE_MEMBLOCK
+/*
+ * Basic iterator support. Return the last range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns last region regardless of node
+ */
+static int __meminit last_active_region_index_in_nid(int nid)
+{
+ int i;
+
+ for (i = nr_nodemap_entries - 1; i >= 0; i--)
+ if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Basic iterator support. Return the previous active range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns next region regardless of node
+ */
+static int __meminit previous_active_region_index_in_nid(int index, int nid)
+{
+ for (index = index - 1; index >= 0; index--)
+ if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+ return index;
+
+ return -1;
+}
+
+#define for_each_active_range_index_in_nid_reverse(i, nid) \
+ for (i = last_active_region_index_in_nid(nid); i != -1; \
+ i = previous_active_region_index_in_nid(i, nid))
+
u64 __init find_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit)
{
int i;
/* Need to go over early_node_map to find out good range for node */
- for_each_active_range_index_in_nid(i, nid) {
+ for_each_active_range_index_in_nid_reverse(i, nid) {
u64 addr;
u64 ei_start, ei_last;
u64 final_start, final_end;
@@ -3748,34 +3780,6 @@ int __init add_from_early_node_map(struct range *range, int az,
return nr_range;
}
-#ifdef CONFIG_NO_BOOTMEM
-void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
- u64 goal, u64 limit)
-{
- void *ptr;
- u64 addr;
-
- if (limit > memblock.current_limit)
- limit = memblock.current_limit;
-
- addr = find_memory_core_early(nid, size, align, goal, limit);
-
- if (addr == MEMBLOCK_ERROR)
- return NULL;
-
- ptr = phys_to_virt(addr);
- memset(ptr, 0, size);
- memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
- /*
- * The min_count is set to 0 so that bootmem allocated blocks
- * are never reported as leaks.
- */
- kmemleak_alloc(ptr, size, 0, 0);
- return ptr;
-}
-#endif
-
-
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
int i;
@@ -4809,15 +4813,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
dma_reserve = new_dma_reserve;
}
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = {
-#ifndef CONFIG_NO_BOOTMEM
- .bdata = &bootmem_node_data[0]
-#endif
- };
-EXPORT_SYMBOL(contig_page_data);
-#endif
-
void __init free_area_init(unsigned long *zones_size)
{
free_area_init_node(0, zones_size,
diff --git a/mm/rmap.c b/mm/rmap.c
index f21f4a1d6a1..941bf82e896 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
int referenced = 0;
- /*
- * Don't want to elevate referenced for mlocked page that gets this far,
- * in order that it progresses to try_to_unmap and is moved to the
- * unevictable list.
- */
- if (vma->vm_flags & VM_LOCKED) {
- *mapcount = 0; /* break early from loop */
- *vm_flags |= VM_LOCKED;
- goto out;
- }
-
- /* Pretend the page is referenced if the task has the
- swap token and is in the middle of a page fault. */
- if (mm != current->mm && has_swap_token(mm) &&
- rwsem_is_locked(&mm->mmap_sem))
- referenced++;
-
if (unlikely(PageTransHuge(page))) {
pmd_t *pmd;
spin_lock(&mm->page_table_lock);
+ /*
+ * rmap might return false positives; we must filter
+ * these out using page_check_address_pmd().
+ */
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_FLAG);
- if (pmd && !pmd_trans_splitting(*pmd) &&
- pmdp_clear_flush_young_notify(vma, address, pmd))
+ if (!pmd) {
+ spin_unlock(&mm->page_table_lock);
+ goto out;
+ }
+
+ if (vma->vm_flags & VM_LOCKED) {
+ spin_unlock(&mm->page_table_lock);
+ *mapcount = 0; /* break early from loop */
+ *vm_flags |= VM_LOCKED;
+ goto out;
+ }
+
+ /* go ahead even if the pmd is pmd_trans_splitting() */
+ if (pmdp_clear_flush_young_notify(vma, address, pmd))
referenced++;
spin_unlock(&mm->page_table_lock);
} else {
pte_t *pte;
spinlock_t *ptl;
+ /*
+ * rmap might return false positives; we must filter
+ * these out using page_check_address().
+ */
pte = page_check_address(page, mm, address, &ptl, 0);
if (!pte)
goto out;
+ if (vma->vm_flags & VM_LOCKED) {
+ pte_unmap_unlock(pte, ptl);
+ *mapcount = 0; /* break early from loop */
+ *vm_flags |= VM_LOCKED;
+ goto out;
+ }
+
if (ptep_clear_flush_young_notify(vma, address, pte)) {
/*
* Don't treat a reference through a sequentially read
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
pte_unmap_unlock(pte, ptl);
}
+ /* Pretend the page is referenced if the task has the
+ swap token and is in the middle of a page fault. */
+ if (mm != current->mm && has_swap_token(mm) &&
+ rwsem_is_locked(&mm->mmap_sem))
+ referenced++;
+
(*mapcount)--;
if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c99060..3437b65d6d6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2144,8 +2144,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
{
struct inode *inode = dentry->d_inode;
- if (*len < 3)
+ if (*len < 3) {
+ *len = 3;
return 255;
+ }
if (inode_unhashed(inode)) {
/* Unfortunately insert_inode_hash is not idempotent,
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c5..a51d9465e62 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/
-ifneq ($(CONFIG_IPV6),)
-obj-y += ipv6/
-endif
+obj-$(CONFIG_NET) += ipv6/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb..6dee7bf648a 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
tristate "802.1d Ethernet Bridging"
select LLC
select STP
+ depends on IPV6 || IPV6=n
---help---
If you say Y here, then your Linux box will be able to act as an
Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc..6561021d22d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
void dev_load(struct net *net, const char *name)
{
struct net_device *dev;
+ int no_module;
rcu_read_lock();
dev = dev_get_by_name_rcu(net, name);
rcu_read_unlock();
- if (!dev && capable(CAP_NET_ADMIN))
- request_module("%s", name);
+ no_module = !dev;
+ if (no_module && capable(CAP_NET_ADMIN))
+ no_module = request_module("netdev-%s", name);
+ if (no_module && capable(CAP_SYS_MODULE)) {
+ if (!request_module("%s", name))
+ pr_err("Loading kernel module for a network device "
+"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
+"instead\n", name);
+ }
}
EXPORT_SYMBOL(dev_load);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461..b5bada92f63 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
pkt_dev->started_at);
ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
- p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n",
+ p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
(unsigned long long)ktime_to_us(elapsed),
(unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
(unsigned long long)ktime_to_us(idle),
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe45445080..4c1ef026d69 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
int fd = fdp[i];
struct file *file;
- if (fd < 0 || !(file = fget(fd)))
+ if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce92..036652c8166 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
ifap = &ifa->ifa_next) {
if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
- ifa->ifa_address) {
+ ifa->ifa_local) {
break; /* found */
}
}
@@ -1040,8 +1040,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
return;
arp_send(ARPOP_REQUEST, ETH_P_ARP,
- ifa->ifa_address, dev,
- ifa->ifa_address, NULL,
+ ifa->ifa_local, dev,
+ ifa->ifa_local, NULL,
dev->dev_addr, NULL);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6613edfac28..d1d0e2c256f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1765,4 +1765,4 @@ module_exit(ipgre_fini);
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
-MODULE_ALIAS("gre0");
+MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54..a5f58e7cbb2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,4 +913,4 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
-MODULE_ALIAS("tunl0");
+MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697b..e528a42a52b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 904312e25a3..e7db7014e89 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -739,8 +739,10 @@ restart:
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
- else
+ else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+ else
+ goto out2;
dst_release(&rt->dst);
rt = nrt ? : net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a54..d2c16e10f65 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1290,4 +1290,4 @@ static int __init sit_init(void)
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
-MODULE_ALIAS("sit0");
+MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421b..c47a511f203 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (conn->c_loopback
&& rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
- return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ scat = &rm->data.op_sg[sg];
+ ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
+ return ret;
}
/* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b1..bca6761a3ca 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg,
unsigned int off)
{
+ struct scatterlist *sgp = &rm->data.op_sg[sg];
+ int ret = sizeof(struct rds_header) +
+ be32_to_cpu(rm->m_inc.i_hdr.h_len);
+
/* Do not send cong updates to loopback */
if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
- return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
+ goto out;
}
BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
NULL);
rds_inc_put(&rm->m_inc);
-
- return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
+out:
+ return ret;
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164..59e599498e3 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
/*
* Mark an RPC call as having completed by clearing the 'active' bit
+ * and then waking up all tasks that were sleeping.
*/
-static void rpc_mark_complete_task(struct rpc_task *task)
+static int rpc_complete_task(struct rpc_task *task)
{
- smp_mb__before_clear_bit();
+ void *m = &task->tk_runstate;
+ wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
+ struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
- smp_mb__after_clear_bit();
- wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+ ret = atomic_dec_and_test(&task->tk_count);
+ if (waitqueue_active(wq))
+ __wake_up_locked_key(wq, TASK_NORMAL, &k);
+ spin_unlock_irqrestore(&wq->lock, flags);
+ return ret;
}
/*
* Allow callers to wait for completion of an RPC call
+ *
+ * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
+ * to enforce taking of the wq->lock and hence avoid races with
+ * rpc_complete_task().
*/
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
{
if (action == NULL)
action = rpc_wait_bit_killable;
- return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+ return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
action, TASK_KILLABLE);
}
EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work)
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
}
-void rpc_put_task(struct rpc_task *task)
+static void rpc_release_resources_task(struct rpc_task *task)
{
- if (!atomic_dec_and_test(&task->tk_count))
- return;
- /* Release resources */
if (task->tk_rqstp)
xprt_release(task);
if (task->tk_msg.rpc_cred)
put_rpccred(task->tk_msg.rpc_cred);
rpc_task_release_client(task);
- if (task->tk_workqueue != NULL) {
+}
+
+static void rpc_final_put_task(struct rpc_task *task,
+ struct workqueue_struct *q)
+{
+ if (q != NULL) {
INIT_WORK(&task->u.tk_work, rpc_async_release);
- queue_work(task->tk_workqueue, &task->u.tk_work);
+ queue_work(q, &task->u.tk_work);
} else
rpc_free_task(task);
}
+
+static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
+{
+ if (atomic_dec_and_test(&task->tk_count)) {
+ rpc_release_resources_task(task);
+ rpc_final_put_task(task, q);
+ }
+}
+
+void rpc_put_task(struct rpc_task *task)
+{
+ rpc_do_put_task(task, NULL);
+}
EXPORT_SYMBOL_GPL(rpc_put_task);
+void rpc_put_task_async(struct rpc_task *task)
+{
+ rpc_do_put_task(task, task->tk_workqueue);
+}
+EXPORT_SYMBOL_GPL(rpc_put_task_async);
+
static void rpc_release_task(struct rpc_task *task)
{
dprintk("RPC: %5u release task\n", task->tk_pid);
BUG_ON (RPC_IS_QUEUED(task));
- /* Wake up anyone who is waiting for task completion */
- rpc_mark_complete_task(task);
+ rpc_release_resources_task(task);
- rpc_put_task(task);
+ /*
+ * Note: at this point we have been removed from rpc_clnt->cl_tasks,
+ * so it should be safe to use task->tk_count as a test for whether
+ * or not any other processes still hold references to our rpc_task.
+ */
+ if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
+ /* Wake up anyone who may be waiting for task completion */
+ if (!rpc_complete_task(task))
+ return;
+ } else {
+ if (!atomic_dec_and_test(&task->tk_count))
+ return;
+ }
+ rpc_final_put_task(task, task->tk_workqueue);
}
int rpciod_up(void)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912..1a10dcd999e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
p, 0, length, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
put_page(p);
+ svc_rdma_put_context(ctxt, 1);
return;
}
atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a5796..be96d429b47 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
}
xs_reclassify_socket(family, sock);
- if (xs_bind(transport, sock)) {
+ err = xs_bind(transport, sock);
+ if (err) {
sock_release(sock);
goto out;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d28620..ba5b8c20849 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* Get the parent directory, calculate the hash for last
* component.
*/
- err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
+ err = kern_path_parent(sunaddr->sun_path, &nd);
if (err)
goto out_mknod_parent;
@@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_namelen = 0;
- mutex_lock(&u->readlock);
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err) {
+ err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
+ goto out;
+ }
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb) {
@@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- mutex_lock(&u->readlock);
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err) {
+ err = sock_intr_errno(timeo);
+ goto out;
+ }
do {
int chunk;
@@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
timeo = unix_stream_data_wait(sk, timeo);
- if (signal_pending(current)) {
+ if (signal_pending(current)
+ || mutex_lock_interruptible(&u->readlock)) {
err = sock_intr_errno(timeo);
goto out;
}
- mutex_lock(&u->readlock);
+
continue;
unlock:
unix_state_unlock(sk);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828..b6f4b994eb3 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
/*
* Socket ?
*/
- if (S_ISSOCK(inode->i_mode)) {
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
struct sock *s = sock->sk;
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 6c94c6ce292..291228e2598 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -309,6 +309,11 @@ static void do_config_file(const char *filename)
close(fd);
}
+/*
+ * Important: The below generated source_foo.o and deps_foo.o variable
+ * assignments are parsed not only by make, but also by the rather simple
+ * parser in scripts/mod/sumversion.c.
+ */
static void parse_dep_file(void *map, size_t len)
{
char *m = map;
@@ -323,7 +328,6 @@ static void parse_dep_file(void *map, size_t len)
exit(1);
}
memcpy(s, m, p-m); s[p-m] = 0;
- printf("deps_%s := \\\n", target);
m = p+1;
clear_config();
@@ -343,12 +347,15 @@ static void parse_dep_file(void *map, size_t len)
strrcmp(s, "arch/um/include/uml-config.h") &&
strrcmp(s, ".ver")) {
/*
- * Do not output the first dependency (the
- * source file), so that kbuild is not confused
- * if a .c file is rewritten into .S or vice
- * versa.
+ * Do not list the source file as dependency, so that
+ * kbuild is not confused if a .c file is rewritten
+ * into .S or vice versa. Storing it in source_* is
+ * needed for modpost to compute srcversions.
*/
- if (!first)
+ if (first) {
+ printf("source_%s := %s\n\n", target, s);
+ printf("deps_%s := \\\n", target);
+ } else
printf(" %s \\\n", s);
do_config_file(s);
}
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9..58848e3e392 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
}
-# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
- if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
- ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
- }
-
# warn about #if 0
if ($line =~ /^.\s*\#\s*if\s+0\b/) {
CHK("if this code is redundant consider removing it\n" .
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index ecf9c7dc182..9dfcd6d988d 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -300,8 +300,8 @@ static int is_static_library(const char *objfile)
return 0;
}
-/* We have dir/file.o. Open dir/.file.o.cmd, look for deps_ line to
- * figure out source file. */
+/* We have dir/file.o. Open dir/.file.o.cmd, look for source_ and deps_ line
+ * to figure out source files. */
static int parse_source_files(const char *objfile, struct md4_ctx *md)
{
char *cmd, *file, *line, *dir;
@@ -340,6 +340,21 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
*/
while ((line = get_next_line(&pos, file, flen)) != NULL) {
char* p = line;
+
+ if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
+ p = strrchr(line, ' ');
+ if (!p) {
+ warn("malformed line: %s\n", line);
+ goto out_file;
+ }
+ p++;
+ if (!parse_file(p, md)) {
+ warn("could not open %s: %s\n",
+ p, strerror(errno));
+ goto out_file;
+ }
+ continue;
+ }
if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) {
check_files = 1;
continue;
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 44423b4dcb8..8c81d76959e 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py
@@ -33,8 +33,6 @@ cmd_opcodes = {
"lockintnowait" : "6",
"lockcont" : "7",
"unlock" : "8",
- "lockbkl" : "9",
- "unlockbkl" : "10",
"signal" : "11",
"resetevent" : "98",
"reset" : "99",
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
index 8821f27cc8b..3710c8b2090 100644
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
index cde1f189a02..b4cc95975ad 100644
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ b/scripts/rt-tester/t2-l1-pi.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
index 3ab0bfc4995..1b57376cc1f 100644
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ b/scripts/rt-tester/t2-l1-signal.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
index f4b5d5d6215..68b10629b6f 100644
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
index 63440ca2cce..8e6c8b11ae5 100644
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-1rt.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
index e5816fe67df..69c2212fc52 100644
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-2rt.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
index 718b82b5d3b..9b0f1eb26a8 100644
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-3rt.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
index c6e21356349..39ec74ab06e 100644
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ b/scripts/rt-tester/t3-l1-pi-signal.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
index f53749d59d7..e03db7e010f 100644
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ b/scripts/rt-tester/t3-l1-pi-steal.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
index cdc3e4fd7ba..7b59100d3e4 100644
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ b/scripts/rt-tester/t3-l2-pi.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
index baa14137f47..2f0e049d644 100644
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ b/scripts/rt-tester/t4-l2-pi-deboost.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
index e6ec0c81b54..04f4034ff89 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
index ca64f8bbf4b..a48a6ee29dd 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/security/commoncap.c b/security/commoncap.c
index 64c2ed9c901..dbfdaed4cc6 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -93,7 +93,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
* Determine whether the current process may set the system clock and timezone
* information, returning 0 if permission granted, -ve if denied.
*/
-int cap_settime(struct timespec *ts, struct timezone *tz)
+int cap_settime(const struct timespec *ts, const struct timezone *tz)
{
if (!capable(CAP_SYS_TIME))
return -EPERM;
diff --git a/security/security.c b/security/security.c
index 7b7308ace8c..bb33ecadcf9 100644
--- a/security/security.c
+++ b/security/security.c
@@ -201,7 +201,7 @@ int security_syslog(int type)
return security_ops->syslog(type);
}
-int security_settime(struct timespec *ts, struct timezone *tz)
+int security_settime(const struct timespec *ts, const struct timezone *tz)
{
return security_ops->settime(ts, tz);
}
diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
index 4bbc3442703..8dfb0a0da67 100644
--- a/sound/soc/codecs/wm8978.c
+++ b/sound/soc/codecs/wm8978.c
@@ -145,18 +145,18 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
SOC_SINGLE("DAC Playback Limiter Threshold",
WM8978_DAC_LIMITER_2, 4, 7, 0),
SOC_SINGLE("DAC Playback Limiter Boost",
- WM8978_DAC_LIMITER_2, 0, 15, 0),
+ WM8978_DAC_LIMITER_2, 0, 12, 0),
SOC_ENUM("ALC Enable Switch", alc1),
SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0),
SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0),
- SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 7, 0),
+ SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 10, 0),
SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0),
SOC_ENUM("ALC Capture Mode", alc3),
- SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 15, 0),
- SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 15, 0),
+ SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 10, 0),
+ SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 10, 0),
SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0),
SOC_SINGLE("ALC Capture Noise Gate Threshold",
@@ -211,8 +211,10 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1),
/* DAC / ADC oversampling */
- SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, 8, 1, 0),
- SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, 8, 1, 0),
+ SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL,
+ 5, 1, 0),
+ SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL,
+ 5, 1, 0),
};
/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 4afbe3b2e44..c6c958ee5d5 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -1418,7 +1418,7 @@ SND_SOC_DAPM_DAC_E("DAC1R", NULL, SND_SOC_NOPM, 0, 0,
static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = {
SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
-SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
+SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
};
@@ -3325,6 +3325,12 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
case WM8958:
snd_soc_add_controls(codec, wm8958_snd_controls,
ARRAY_SIZE(wm8958_snd_controls));
+ snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
+ ARRAY_SIZE(wm8994_lateclk_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
+ ARRAY_SIZE(wm8994_adc_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
+ ARRAY_SIZE(wm8994_dac_widgets));
snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
ARRAY_SIZE(wm8958_dapm_widgets));
break;
@@ -3350,6 +3356,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
}
break;
case WM8958:
+ snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
+ ARRAY_SIZE(wm8994_lateclk_intercon));
snd_soc_dapm_add_routes(dapm, wm8958_intercon,
ARRAY_SIZE(wm8958_intercon));
break;
diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c
index 161750443eb..73dde4a1adc 100644
--- a/sound/soc/omap/am3517evm.c
+++ b/sound/soc/omap/am3517evm.c
@@ -139,7 +139,7 @@ static struct snd_soc_dai_link am3517evm_dai = {
.cpu_dai_name ="omap-mcbsp-dai.0",
.codec_dai_name = "tlv320aic23-hifi",
.platform_name = "omap-pcm-audio",
- .codec_name = "tlv320aic23-codec",
+ .codec_name = "tlv320aic23-codec.2-001a",
.init = am3517evm_aic23_init,
.ops = &am3517evm_ops,
};
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 25e54230cc6..1790f83ee66 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -941,7 +941,7 @@ static void dapm_seq_run(struct snd_soc_dapm_context *dapm,
}
if (!list_empty(&pending))
- dapm_seq_run_coalesced(dapm, &pending);
+ dapm_seq_run_coalesced(cur_dapm, &pending);
}
static void dapm_widget_update(struct snd_soc_dapm_context *dapm)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 5a72d421e21..e5230c0ef95 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -189,11 +189,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
const char *name, bool is_kallsyms)
{
const size_t size = PATH_MAX;
- char *realname = realpath(name, NULL),
- *filename = malloc(size),
+ char *realname, *filename = malloc(size),
*linkname = malloc(size), *targetname;
int len, err = -1;
+ if (is_kallsyms)
+ realname = (char *)name;
+ else
+ realname = realpath(name, NULL);
+
if (realname == NULL || filename == NULL || linkname == NULL)
goto out_free;
@@ -225,7 +229,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
if (symlink(targetname, linkname) == 0)
err = 0;
out_free:
- free(realname);
+ if (!is_kallsyms)
+ free(realname);
free(filename);
free(linkname);
return err;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 17f9c4a66dd..194f9e2a328 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1328,7 +1328,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
* Continue if no error, because the lazy pattern will match
* to other lines
*/
- return ret < 0 ?: 0;
+ return ret < 0 ? ret : 0;
}
/* Find probe points from lazy pattern */