summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/00-INDEX6
-rw-r--r--Documentation/bus-virt-phys-mapping.txt (renamed from Documentation/IO-mapping.txt)0
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/parisc/kernel/ftrace.c4
-rw-r--r--arch/powerpc/include/asm/cputable.h3
-rw-r--r--arch/powerpc/kernel/process.c11
-rw-r--r--arch/sparc/configs/sparc64_defconfig49
-rw-r--r--arch/sparc/include/asm/cache.h1
-rw-r--r--arch/sparc/include/asm/pgtable_32.h5
-rw-r--r--arch/sparc/kernel/perf_event.c1
-rw-r--r--arch/sparc/kernel/sun4d_irq.c2
-rw-r--r--arch/sparc/kernel/ttable.S2
-rw-r--r--arch/sparc/mm/srmmu.c2
-rw-r--r--arch/sparc/mm/sun4c.c3
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/kprobes.c2
-rw-r--r--arch/x86/kernel/quirks.c5
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/pci/i386.c1
-rw-r--r--arch/x86/pci/mrst.c7
-rw-r--r--drivers/clocksource/cs5535-clockevt.c2
-rw-r--r--drivers/edac/Kconfig2
-rw-r--r--drivers/edac/mpc85xx_edac.c1
-rw-r--r--drivers/gpio/cs5535-gpio.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c13
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h64
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c33
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c3
-rw-r--r--drivers/gpu/drm/radeon/r100.c1
-rw-r--r--drivers/gpu/drm/radeon/r300.c3
-rw-r--r--drivers/gpu/drm/radeon/r520.c1
-rw-r--r--drivers/gpu/drm/radeon/r600.c1
-rw-r--r--drivers/gpu/drm/radeon/r600_blit.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c20
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c23
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_tv.c4
-rw-r--r--drivers/gpu/drm/radeon/rs400.c5
-rw-r--r--drivers/gpu/drm/radeon/rs600.c1
-rw-r--r--drivers/gpu/drm/radeon/rs690.c1
-rw-r--r--drivers/gpu/drm/radeon/rv515.c1
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c1
-rw-r--r--drivers/misc/cs5535-mfgpt.c2
-rw-r--r--drivers/mmc/host/sdhci-s3c.c20
-rw-r--r--drivers/net/ibmveth.c4
-rw-r--r--drivers/net/pcmcia/axnet_cs.c7
-rw-r--r--drivers/net/r8169.c2
-rw-r--r--drivers/net/wireless/ath/ath9k/hif_usb.c8
-rw-r--r--drivers/net/wireless/hostap/hostap_pci.c1
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-sta.h11
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00dev.c10
-rw-r--r--drivers/pci/setup-res.c32
-rw-r--r--drivers/pcmcia/pcmcia_resource.c4
-rw-r--r--drivers/platform/x86/intel_scu_ipc.c12
-rw-r--r--drivers/power/ds2782_battery.c2
-rw-r--r--drivers/s390/block/dasd_devmap.c4
-rw-r--r--drivers/s390/cio/chsc.c2
-rw-r--r--drivers/sbus/char/openprom.c4
-rw-r--r--drivers/serial/suncore.c4
-rw-r--r--drivers/serial/sunsu.c13
-rw-r--r--drivers/usb/gadget/f_fs.c2
-rw-r--r--drivers/vhost/net.c13
-rw-r--r--drivers/video/aty/radeon_pm.c2
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/ceph/auth_x.c3
-rw-r--r--fs/ceph/mds_client.c35
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/messenger.c71
-rw-r--r--fs/ceph/osdmap.c1
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/ocfs2/aops.c94
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c130
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h3
-rw-r--r--fs/xfs/quota/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--include/linux/cpu.h25
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--include/linux/fb.h2
-rw-r--r--include/linux/fdtable.h3
-rw-r--r--include/linux/jbd2.h11
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/pci.h1
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/linux/sched.h53
-rw-r--r--include/linux/topology.h1
-rw-r--r--include/linux/vgaarb.h1
-rw-r--r--include/math-emu/op-common.h2
-rw-r--r--include/net/sock.h7
-rw-r--r--ipc/sem.c46
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/cpuset.c21
-rw-r--r--kernel/early_res.c6
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/hrtimer.c8
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/perf_event.c2
-rw-r--r--kernel/posix-cpu-timers.c36
-rw-r--r--kernel/rcutorture.c3
-rw-r--r--kernel/sched.c381
-rw-r--r--kernel/sched_clock.c95
-rw-r--r--kernel/sched_cpupri.c8
-rw-r--r--kernel/sched_cpupri.h2
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c532
-rw-r--r--kernel/sched_rt.c3
-rw-r--r--kernel/sched_stats.h27
-rw-r--r--kernel/time/tick-sched.c8
-rw-r--r--kernel/timer.c8
-rw-r--r--kernel/trace/trace_clock.c2
-rw-r--r--kernel/workqueue_sched.h16
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_cgroup.c7
-rw-r--r--mm/vmscan.c10
-rw-r--r--net/bluetooth/hci_conn.c5
-rw-r--r--net/bluetooth/hci_event.c2
-rw-r--r--net/bluetooth/l2cap.c14
-rw-r--r--net/bridge/br_device.c9
-rw-r--r--net/bridge/br_forward.c23
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/neighbour.c5
-rw-r--r--net/dsa/Kconfig2
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv6/mip6.c3
-rw-r--r--net/phonet/pep.c1
-rw-r--r--net/sched/act_nat.c5
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/xfrm/xfrm_policy.c15
-rw-r--r--sound/soc/codecs/Kconfig4
-rw-r--r--sound/soc/codecs/wm8727.c2
-rw-r--r--sound/soc/codecs/wm8776.c1
-rw-r--r--sound/soc/codecs/wm8988.c1
-rw-r--r--sound/soc/sh/fsi.c27
-rw-r--r--tools/perf/arch/sparc/Makefile4
-rw-r--r--tools/perf/arch/sparc/util/dwarf-regs.c43
171 files changed, 2270 insertions, 984 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index dd10b51b4e6..5405f7aecef 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -32,8 +32,6 @@ DocBook/
- directory with DocBook templates etc. for kernel documentation.
HOWTO
- the process and procedures of how to do Linux kernel development.
-IO-mapping.txt
- - how to access I/O mapped memory from within device drivers.
IPMI.txt
- info on Linux Intelligent Platform Management Interface (IPMI) Driver.
IRQ-affinity.txt
@@ -84,6 +82,8 @@ blockdev/
- info on block devices & drivers
btmrvl.txt
- info on Marvell Bluetooth driver usage.
+bus-virt-phys-mapping.txt
+ - how to access I/O mapped memory from within device drivers.
cachetlb.txt
- describes the cache/TLB flushing interfaces Linux uses.
cdrom/
@@ -168,6 +168,8 @@ initrd.txt
- how to use the RAM disk as an initial/temporary root filesystem.
input/
- info on Linux input device support.
+io-mapping.txt
+ - description of io_mapping functions in linux/io-mapping.h
io_ordering.txt
- info on ordering I/O writes to memory-mapped addresses.
ioctl/
diff --git a/Documentation/IO-mapping.txt b/Documentation/bus-virt-phys-mapping.txt
index 1b5aa10df84..1b5aa10df84 100644
--- a/Documentation/IO-mapping.txt
+++ b/Documentation/bus-virt-phys-mapping.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index 58848125b8b..db3d0f5061f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5336,6 +5336,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6.git
S: Maintained
F: arch/sparc/
+F: drivers/sbus
SPARC SERIAL DRIVERS
M: "David S. Miller" <davem@davemloft.net>
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 9877372ffdb..5beb97bafbb 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0,
unsigned long ret;
pop_return_trace(&trace, &ret);
- trace.rettime = cpu_clock(raw_smp_processor_id());
+ trace.rettime = local_clock();
ftrace_graph_return(&trace);
if (unlikely(!ret)) {
@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- calltime = cpu_clock(raw_smp_processor_id());
+ calltime = local_clock();
if (push_return_trace(old, calltime,
self_addr, &trace.depth) == -EBUSY) {
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b0b21134f61..4b611ca1a76 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -197,6 +197,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
+#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
#ifndef __ASSEMBLY__
@@ -412,7 +413,7 @@ extern const char *powerpc_base_platform;
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_SAO)
+ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 773424df828..43855c9f84d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1263,3 +1263,14 @@ unsigned long randomize_et_dyn(unsigned long base)
return ret;
}
+
+#ifdef CONFIG_SMP
+int arch_sd_sibling_asym_packing(void)
+{
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ return SD_ASYM_PACKING;
+ }
+ return 0;
+}
+#endif
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 259e3fd5099..1dc07a0014c 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.34-rc3
-# Sat Apr 3 15:49:56 2010
+# Linux kernel version: 2.6.34
+# Wed May 26 21:14:01 2010
#
CONFIG_64BIT=y
CONFIG_SPARC=y
@@ -107,10 +107,9 @@ CONFIG_PERF_COUNTERS=y
# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_PCI_QUIRKS=y
-CONFIG_SLUB_DEBUG=y
# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
# CONFIG_SLOB is not set
CONFIG_PROFILING=y
CONFIG_TRACEPOINTS=y
@@ -239,6 +238,7 @@ CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_COMPACTION is not set
CONFIG_MIGRATION=y
CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_ZONE_DMA_FLAG=0
@@ -351,6 +351,7 @@ CONFIG_IPV6_TUNNEL=m
# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
+# CONFIG_L2TP is not set
# CONFIG_BRIDGE is not set
# CONFIG_NET_DSA is not set
CONFIG_VLAN_8021Q=m
@@ -367,6 +368,7 @@ CONFIG_VLAN_8021Q=m
# CONFIG_IEEE802154 is not set
# CONFIG_NET_SCHED is not set
# CONFIG_DCB is not set
+CONFIG_RPS=y
#
# Network testing
@@ -386,9 +388,14 @@ CONFIG_WIRELESS=y
#
# CFG80211 needs to be enabled for MAC80211
#
+
+#
+# Some wireless drivers require a rate control algorithm
+#
# CONFIG_WIMAX is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
+# CONFIG_CAIF is not set
#
# Device Drivers
@@ -658,6 +665,7 @@ CONFIG_PHYLIB=m
# CONFIG_NATIONAL_PHY is not set
# CONFIG_STE10XP is not set
# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_MICREL_PHY is not set
# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=m
@@ -734,6 +742,8 @@ CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_CHELSIO_T3 is not set
+CONFIG_CHELSIO_T4_DEPENDS=y
+# CONFIG_CHELSIO_T4 is not set
# CONFIG_ENIC is not set
# CONFIG_IXGBE is not set
# CONFIG_IXGBEVF is not set
@@ -766,6 +776,7 @@ CONFIG_NIU=m
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
+# CONFIG_USB_IPHETH is not set
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
@@ -778,7 +789,6 @@ CONFIG_PPP_DEFLATE=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_MPPE=m
CONFIG_PPPOE=m
-# CONFIG_PPPOL2TP is not set
# CONFIG_SLIP is not set
CONFIG_SLHC=m
# CONFIG_NET_FC is not set
@@ -816,6 +826,7 @@ CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_QT2160 is not set
CONFIG_KEYBOARD_LKKBD=m
+# CONFIG_KEYBOARD_TCA6416 is not set
# CONFIG_KEYBOARD_MAX7359 is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_OPENCORES is not set
@@ -840,6 +851,7 @@ CONFIG_MOUSE_SERIAL=y
# CONFIG_INPUT_TABLET is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_AD714X is not set
CONFIG_INPUT_SPARCSPKR=y
# CONFIG_INPUT_ATI_REMOTE is not set
# CONFIG_INPUT_ATI_REMOTE2 is not set
@@ -848,6 +860,7 @@ CONFIG_INPUT_SPARCSPKR=y
# CONFIG_INPUT_YEALINK is not set
# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_UINPUT is not set
+# CONFIG_INPUT_PCF8574 is not set
#
# Hardware I/O ports
@@ -871,6 +884,7 @@ CONFIG_HW_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
# CONFIG_DEVKMEM is not set
# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_N_GSM is not set
# CONFIG_NOZOMI is not set
#
@@ -893,6 +907,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
# CONFIG_SERIAL_TIMBERDALE is not set
# CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
CONFIG_UNIX98_PTYS=y
# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
# CONFIG_LEGACY_PTYS is not set
@@ -1306,11 +1322,14 @@ CONFIG_USB_HIDDEV=y
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
+# CONFIG_HID_CANDO is not set
CONFIG_HID_CHERRY=y
CONFIG_HID_CHICONY=y
+# CONFIG_HID_PRODIKEYS is not set
CONFIG_HID_CYPRESS=y
CONFIG_HID_DRAGONRISE=y
# CONFIG_DRAGONRISE_FF is not set
+# CONFIG_HID_EGALAX is not set
CONFIG_HID_EZKEY=y
CONFIG_HID_KYE=y
CONFIG_HID_GYRATION=y
@@ -1328,7 +1347,9 @@ CONFIG_HID_ORTEK=y
CONFIG_HID_PANTHERLORD=y
# CONFIG_PANTHERLORD_FF is not set
CONFIG_HID_PETALYNX=y
+# CONFIG_HID_PICOLCD is not set
# CONFIG_HID_QUANTA is not set
+# CONFIG_HID_ROCCAT_KONE is not set
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
# CONFIG_HID_STANTUM is not set
@@ -1342,6 +1363,7 @@ CONFIG_HID_THRUSTMASTER=y
# CONFIG_THRUSTMASTER_FF is not set
CONFIG_HID_ZEROPLUS=y
# CONFIG_ZEROPLUS_FF is not set
+# CONFIG_HID_ZYDACRON is not set
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1356,7 +1378,6 @@ CONFIG_USB=y
# CONFIG_USB_DEVICEFS is not set
# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_OTG is not set
# CONFIG_USB_MON is not set
# CONFIG_USB_WUSB is not set
# CONFIG_USB_WUSB_CBAF is not set
@@ -1521,10 +1542,6 @@ CONFIG_RTC_DRV_STARFIRE=y
# CONFIG_DMADEVICES is not set
# CONFIG_AUXDISPLAY is not set
# CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
# CONFIG_STAGING is not set
#
@@ -1706,8 +1723,8 @@ CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
CONFIG_SCHEDSTATS=y
# CONFIG_TIMER_STATS is not set
# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_KMEMLEAK is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
# CONFIG_DEBUG_SPINLOCK is not set
@@ -1742,6 +1759,9 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_DEBUG_PAGEALLOC is not set
CONFIG_NOP_TRACER=y
CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
@@ -1769,12 +1789,12 @@ CONFIG_BLK_DEV_IO_TRACE=y
# CONFIG_RING_BUFFER_BENCHMARK is not set
# CONFIG_DYNAMIC_DEBUG is not set
# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_DCFLUSH is not set
-# CONFIG_STACK_DEBUG is not set
# CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set
#
@@ -1895,6 +1915,7 @@ CONFIG_CRYPTO_DEFLATE=y
#
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_DEV_NIAGARA2 is not set
# CONFIG_CRYPTO_DEV_HIFN_795X is not set
CONFIG_BINARY_PRINTF=y
diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
index 0588b8c7faa..69358b590c9 100644
--- a/arch/sparc/include/asm/cache.h
+++ b/arch/sparc/include/asm/cache.h
@@ -11,7 +11,6 @@
#define L1_CACHE_SHIFT 5
#define L1_CACHE_BYTES 32
-#define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)))
#ifdef CONFIG_SPARC32
#define SMP_CACHE_BYTES_SHIFT 5
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 77f906d8cc2..0ece77f4775 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -142,13 +142,12 @@ BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page_vaddr, pgd_t)
#define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd)
#define pgd_page_vaddr(pgd) BTFIXUP_CALL(pgd_page_vaddr)(pgd)
-BTFIXUPDEF_SETHI(none_mask)
BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t)
BTFIXUPDEF_CALL(void, pte_clear, pte_t *)
static inline int pte_none(pte_t pte)
{
- return !(pte_val(pte) & ~BTFIXUP_SETHI(none_mask));
+ return !pte_val(pte);
}
#define pte_present(pte) BTFIXUP_CALL(pte_present)(pte)
@@ -160,7 +159,7 @@ BTFIXUPDEF_CALL(void, pmd_clear, pmd_t *)
static inline int pmd_none(pmd_t pmd)
{
- return !(pmd_val(pmd) & ~BTFIXUP_SETHI(none_mask));
+ return !pmd_val(pmd);
}
#define pmd_bad(pmd) BTFIXUP_CALL(pmd_bad)(pmd)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0ec92c8861d..44faabc3c02 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -657,6 +657,7 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
cpuc->current_idx[i] = idx;
enc = perf_event_get_enc(cpuc->events[i]);
+ pcr &= ~mask_for_index(idx);
pcr |= event_encoding(enc, idx);
}
out:
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index ab036a72de5..e11b4612dab 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -183,7 +183,7 @@ void sun4d_free_irq(unsigned int irq, void *dev_id)
goto out_unlock;
}
- if (action && tmp)
+ if (tmp)
tmp->next = action->next;
else
*actionp = action->next;
diff --git a/arch/sparc/kernel/ttable.S b/arch/sparc/kernel/ttable.S
index 76d837fc47d..c6dfdaa29e2 100644
--- a/arch/sparc/kernel/ttable.S
+++ b/arch/sparc/kernel/ttable.S
@@ -64,7 +64,7 @@ tl0_irq6: TRAP_IRQ(smp_call_function_single_client, 6)
tl0_irq6: BTRAP(0x46)
#endif
tl0_irq7: TRAP_IRQ(deferred_pcr_work_irq, 7)
-#ifdef CONFIG_KGDB
+#if defined(CONFIG_KGDB) && defined(CONFIG_SMP)
tl0_irq8: TRAP_IRQ(smp_kgdb_capture_client, 8)
#else
tl0_irq8: BTRAP(0x48)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index f5f75a58e0b..b0b43aa5e45 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -2215,8 +2215,6 @@ void __init ld_mmu_srmmu(void)
BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM);
- BTFIXUPSET_SETHI(none_mask, 0xF0000000);
-
BTFIXUPSET_CALL(pte_present, srmmu_pte_present, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(pte_clear, srmmu_pte_clear, BTFIXUPCALL_SWAPO0G0);
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index cf38846753d..4289f90f869 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -2087,9 +2087,6 @@ void __init ld_mmu_sun4c(void)
BTFIXUPSET_CALL(set_pte, sun4c_set_pte, BTFIXUPCALL_STO1O0);
- /* The 2.4.18 code does not set this on sun4c, how does it work? XXX */
- /* BTFIXUPSET_SETHI(none_mask, 0x00000000); */ /* Defaults to zero? */
-
BTFIXUPSET_CALL(pte_pfn, sun4c_pte_pfn, BTFIXUPCALL_NORM);
#if 0 /* PAGE_SHIFT <= 12 */ /* Eek. Investigate. XXX */
BTFIXUPSET_CALL(pmd_page, sun4c_pmd_page, BTFIXUPCALL_ANDNINT(PAGE_SIZE - 1));
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c02cc692985..a96489ee6ca 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -921,7 +921,7 @@ void disable_local_APIC(void)
unsigned int value;
/* APIC hasn't been mapped yet */
- if (!apic_phys)
+ if (!x2apic_mode && !apic_phys)
return;
clear_local_APIC();
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ebdb85cf268..e5cc7e82e60 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,6 +18,7 @@
#include <asm/apic.h>
#include <asm/iommu.h>
#include <asm/gart.h>
+#include <asm/hpet.h>
static void __init fix_hypertransport_config(int num, int slot, int func)
{
@@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
+/*
+ * Force the read back of the CMP register in hpet_next_event()
+ * to work around the problem that the CMP register write seems to be
+ * delayed. See hpet_next_event() for details.
+ *
+ * We do this on all SMBUS incarnations for now until we have more
+ * information about the affected chipsets.
+ */
+static void __init ati_hpet_bugs(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+ hpet_readback_cmp = 1;
+#endif
+}
+
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
+ { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+ PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
{}
};
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 345a4b1fe14..675879b65ce 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -640,8 +640,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
/* Skip cs, ip, orig_ax and gs. */ \
" subl $16, %esp\n" \
" pushl %fs\n" \
- " pushl %ds\n" \
" pushl %es\n" \
+ " pushl %ds\n" \
" pushl %eax\n" \
" pushl %ebp\n" \
" pushl %edi\n" \
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e72d3fc6547..939b9e98245 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -498,15 +498,10 @@ void force_hpet_resume(void)
* See erratum #27 (Misinterpreted MSI Requests May Result in
* Corrupted LPC DMA Data) in AMD Publication #46837,
* "SB700 Family Product Errata", Rev. 1.0, March 2010.
- *
- * Also force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
*/
static void force_disable_hpet_msi(struct pci_dev *unused)
{
hpet_msi_disable = 1;
- hpet_readback_cmp = 1;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index de3b63ae3da..a60df9ae645 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -238,6 +238,15 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
+ /*
+ * Ensure that the boot cpu numa_node is correct when the boot
+ * cpu is on a node that doesn't have memory installed.
+ * Also cpu_up() will call cpu_to_node() for APs when
+ * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
+ * up later with c_init aka intel_init/amd_init.
+ * So set them all (boot cpu and all APs).
+ */
+ set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
#endif
/*
@@ -257,14 +266,6 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
- /*
- * make sure boot cpu numa_node is right, when boot cpu is on the
- * node that doesn't have mem installed
- */
- set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id));
-#endif
-
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3699613e883..b1ed0a1a591 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2926,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
return kvm_mmu_zap_page(kvm, page) + 1;
}
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
struct kvm *kvm;
struct kvm *kvm_freed = NULL;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6fdb3ec30c3..55253095be8 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -184,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass)
idx, r, disabled, pass);
if (pci_claim_resource(dev, idx) < 0) {
/* We'll assign a new address later */
+ dev->fw_addr[idx] = r->start;
r->end -= r->start;
r->start = 0;
}
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 7ef3a2735df..cb29191cee5 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
devfn, pos, 4, &pcie_cap))
return 0;
- if (pcie_cap == 0xffffffff)
- return 0;
+ if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 ||
+ PCI_EXT_CAP_ID(pcie_cap) == 0xffff)
+ break;
if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
@@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
return pos;
}
- pos = pcie_cap >> 20;
+ pos = PCI_EXT_CAP_NEXT(pcie_cap);
}
return 0;
diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c
index d7be69f1315..b7dab32ce63 100644
--- a/drivers/clocksource/cs5535-clockevt.c
+++ b/drivers/clocksource/cs5535-clockevt.c
@@ -194,6 +194,6 @@ err_timer:
module_init(cs5535_mfgpt_init);
-MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
MODULE_DESCRIPTION("CS5535/CS5536 MFGPT clock event driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index aedef7941b2..0d2f9dbb47e 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -209,7 +209,7 @@ config EDAC_I5100
config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx"
- depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
+ depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
help
Support for error detection and correction on the Freescale
MPC8349, MPC8560, MPC8540, MPC8548
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 52ca09bf472..f39b00a46ed 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -1120,6 +1120,7 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
{ .compatible = "fsl,mpc8555-memory-controller", },
{ .compatible = "fsl,mpc8560-memory-controller", },
{ .compatible = "fsl,mpc8568-memory-controller", },
+ { .compatible = "fsl,mpc8569-memory-controller", },
{ .compatible = "fsl,mpc8572-memory-controller", },
{ .compatible = "fsl,mpc8349-memory-controller", },
{ .compatible = "fsl,p2020-memory-controller", },
diff --git a/drivers/gpio/cs5535-gpio.c b/drivers/gpio/cs5535-gpio.c
index f73a1555e49..e23c06893d1 100644
--- a/drivers/gpio/cs5535-gpio.c
+++ b/drivers/gpio/cs5535-gpio.c
@@ -352,6 +352,6 @@ static void __exit cs5535_gpio_exit(void)
module_init(cs5535_gpio_init);
module_exit(cs5535_gpio_exit);
-MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
MODULE_DESCRIPTION("AMD CS5535/CS5536 GPIO driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 074385882cc..51bd301cf10 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2241,6 +2241,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj,
page = read_cache_page_gfp(mapping, i,
GFP_HIGHUSER |
__GFP_COLD |
+ __GFP_RECLAIMABLE |
gfpmask);
if (IS_ERR(page))
goto err_pages;
@@ -4741,6 +4742,16 @@ i915_gem_load(struct drm_device *dev)
list_add(&dev_priv->mm.shrink_list, &shrink_list);
spin_unlock(&shrink_list_lock);
+ /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
+ if (IS_GEN3(dev)) {
+ u32 tmp = I915_READ(MI_ARB_STATE);
+ if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
+ /* arb state is a masked write, so set bit + bit in mask */
+ tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
+ I915_WRITE(MI_ARB_STATE, tmp);
+ }
+ }
+
/* Old X drivers will take 0-2 for front, back, depth buffers */
if (!drm_core_check_feature(dev, DRIVER_MODESET))
dev_priv->fence_reg_start = 3;
@@ -4977,7 +4988,7 @@ i915_gpu_is_active(struct drm_device *dev)
}
static int
-i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
drm_i915_private_t *dev_priv, *next_dev;
struct drm_i915_gem_object *obj_priv, *next_obj;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 150400f4053..6d9b0288272 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -359,6 +359,70 @@
#define LM_BURST_LENGTH 0x00000700
#define LM_FIFO_WATERMARK 0x0000001F
#define MI_ARB_STATE 0x020e4 /* 915+ only */
+#define MI_ARB_MASK_SHIFT 16 /* shift for enable bits */
+
+/* Make render/texture TLB fetches lower priorty than associated data
+ * fetches. This is not turned on by default
+ */
+#define MI_ARB_RENDER_TLB_LOW_PRIORITY (1 << 15)
+
+/* Isoch request wait on GTT enable (Display A/B/C streams).
+ * Make isoch requests stall on the TLB update. May cause
+ * display underruns (test mode only)
+ */
+#define MI_ARB_ISOCH_WAIT_GTT (1 << 14)
+
+/* Block grant count for isoch requests when block count is
+ * set to a finite value.
+ */
+#define MI_ARB_BLOCK_GRANT_MASK (3 << 12)
+#define MI_ARB_BLOCK_GRANT_8 (0 << 12) /* for 3 display planes */
+#define MI_ARB_BLOCK_GRANT_4 (1 << 12) /* for 2 display planes */
+#define MI_ARB_BLOCK_GRANT_2 (2 << 12) /* for 1 display plane */
+#define MI_ARB_BLOCK_GRANT_0 (3 << 12) /* don't use */
+
+/* Enable render writes to complete in C2/C3/C4 power states.
+ * If this isn't enabled, render writes are prevented in low
+ * power states. That seems bad to me.
+ */
+#define MI_ARB_C3_LP_WRITE_ENABLE (1 << 11)
+
+/* This acknowledges an async flip immediately instead
+ * of waiting for 2TLB fetches.
+ */
+#define MI_ARB_ASYNC_FLIP_ACK_IMMEDIATE (1 << 10)
+
+/* Enables non-sequential data reads through arbiter
+ */
+#define MI_ARB_DUAL_DATA_PHASE_DISABLE (1 << 9)
+
+/* Disable FSB snooping of cacheable write cycles from binner/render
+ * command stream
+ */
+#define MI_ARB_CACHE_SNOOP_DISABLE (1 << 8)
+
+/* Arbiter time slice for non-isoch streams */
+#define MI_ARB_TIME_SLICE_MASK (7 << 5)
+#define MI_ARB_TIME_SLICE_1 (0 << 5)
+#define MI_ARB_TIME_SLICE_2 (1 << 5)
+#define MI_ARB_TIME_SLICE_4 (2 << 5)
+#define MI_ARB_TIME_SLICE_6 (3 << 5)
+#define MI_ARB_TIME_SLICE_8 (4 << 5)
+#define MI_ARB_TIME_SLICE_10 (5 << 5)
+#define MI_ARB_TIME_SLICE_14 (6 << 5)
+#define MI_ARB_TIME_SLICE_16 (7 << 5)
+
+/* Low priority grace period page size */
+#define MI_ARB_LOW_PRIORITY_GRACE_4KB (0 << 4) /* default */
+#define MI_ARB_LOW_PRIORITY_GRACE_8KB (1 << 4)
+
+/* Disable display A/B trickle feed */
+#define MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE (1 << 2)
+
+/* Set display plane priority */
+#define MI_ARB_DISPLAY_PRIORITY_A_B (0 << 0) /* display A > display B */
+#define MI_ARB_DISPLAY_PRIORITY_B_A (1 << 0) /* display B > display A */
+
#define CACHE_MODE_0 0x02120 /* 915+ only */
#define CM0_MASK_SHIFT 16
#define CM0_IZ_OPT_DISABLE (1<<6)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index fc924b64919..e492919faf4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -203,36 +203,26 @@ struct methods {
const bool rw;
};
-static struct methods nv04_methods[] = {
- { "PROM", load_vbios_prom, false },
- { "PRAMIN", load_vbios_pramin, true },
- { "PCIROM", load_vbios_pci, true },
-};
-
-static struct methods nv50_methods[] = {
- { "ACPI", load_vbios_acpi, true },
+static struct methods shadow_methods[] = {
{ "PRAMIN", load_vbios_pramin, true },
{ "PROM", load_vbios_prom, false },
{ "PCIROM", load_vbios_pci, true },
+ { "ACPI", load_vbios_acpi, true },
};
-#define METHODCNT 3
-
static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
{
- struct drm_nouveau_private *dev_priv = dev->dev_private;
- struct methods *methods;
- int i;
+ const int nr_methods = ARRAY_SIZE(shadow_methods);
+ struct methods *methods = shadow_methods;
int testscore = 3;
- int scores[METHODCNT];
+ int scores[nr_methods], i;
if (nouveau_vbios) {
- methods = nv04_methods;
- for (i = 0; i < METHODCNT; i++)
+ for (i = 0; i < nr_methods; i++)
if (!strcasecmp(nouveau_vbios, methods[i].desc))
break;
- if (i < METHODCNT) {
+ if (i < nr_methods) {
NV_INFO(dev, "Attempting to use BIOS image from %s\n",
methods[i].desc);
@@ -244,12 +234,7 @@ static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
NV_ERROR(dev, "VBIOS source \'%s\' invalid\n", nouveau_vbios);
}
- if (dev_priv->card_type < NV_50)
- methods = nv04_methods;
- else
- methods = nv50_methods;
-
- for (i = 0; i < METHODCNT; i++) {
+ for (i = 0; i < nr_methods; i++) {
NV_TRACE(dev, "Attempting to load BIOS image from %s\n",
methods[i].desc);
data[0] = data[1] = 0; /* avoid reuse of previous image */
@@ -260,7 +245,7 @@ static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
}
while (--testscore > 0) {
- for (i = 0; i < METHODCNT; i++) {
+ for (i = 0; i < nr_methods; i++) {
if (scores[i] == testscore) {
NV_TRACE(dev, "Using BIOS image from %s\n",
methods[i].desc);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index c9a4a0d2a11..257ea130ae1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -387,7 +387,8 @@ int nouveau_fbcon_init(struct drm_device *dev)
dev_priv->nfbdev = nfbdev;
nfbdev->helper.funcs = &nouveau_fbcon_helper_funcs;
- ret = drm_fb_helper_init(dev, &nfbdev->helper, 2, 4);
+ ret = drm_fb_helper_init(dev, &nfbdev->helper,
+ nv_two_heads(dev) ? 2 : 1, 4);
if (ret) {
kfree(nfbdev);
return ret;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 3970e62eaab..aab5ba040bd 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2354,6 +2354,7 @@ void r100_mc_init(struct radeon_device *rdev)
if (rdev->flags & RADEON_IS_IGP)
base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
radeon_vram_location(rdev, &rdev->mc, base);
+ rdev->mc.gtt_base_align = 0;
if (!(rdev->flags & RADEON_IS_AGP))
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 7e81db5eb80..19a7ef7ee34 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -481,6 +481,7 @@ void r300_mc_init(struct radeon_device *rdev)
if (rdev->flags & RADEON_IS_IGP)
base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
radeon_vram_location(rdev, &rdev->mc, base);
+ rdev->mc.gtt_base_align = 0;
if (!(rdev->flags & RADEON_IS_AGP))
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
@@ -1176,6 +1177,8 @@ int r300_cs_parse(struct radeon_cs_parser *p)
int r;
track = kzalloc(sizeof(*track), GFP_KERNEL);
+ if (track == NULL)
+ return -ENOMEM;
r100_cs_track_clear(p->rdev, track);
p->track = track;
do {
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 34330df2848..694af7cc23a 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -125,6 +125,7 @@ void r520_mc_init(struct radeon_device *rdev)
r520_vram_get_type(rdev);
r100_vram_init_sizes(rdev);
radeon_vram_location(rdev, &rdev->mc, 0);
+ rdev->mc.gtt_base_align = 0;
if (!(rdev->flags & RADEON_IS_AGP))
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 3d6645ce215..e100f69faee 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1179,6 +1179,7 @@ void r600_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
if (rdev->flags & RADEON_IS_IGP)
base = (RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
radeon_vram_location(rdev, &rdev->mc, base);
+ rdev->mc.gtt_base_align = 0;
radeon_gtt_location(rdev, mc);
}
}
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index f4fb88ece2b..ca5c29f7077 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -538,9 +538,12 @@ int
r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
+ int ret;
DRM_DEBUG("\n");
- r600_nomm_get_vb(dev);
+ ret = r600_nomm_get_vb(dev);
+ if (ret)
+ return ret;
dev_priv->blit_vb->file_priv = file_priv;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ab61aaa887b..2f94dc66c18 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -351,6 +351,7 @@ struct radeon_mc {
int vram_mtrr;
bool vram_is_ddr;
bool igp_sideport_enabled;
+ u64 gtt_base_align;
};
bool radeon_combios_sideport_present(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 99bd8a9c56b..10673ae59cf 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -280,6 +280,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
}
}
+ /* ASUS HD 3600 board lists the DVI port as HDMI */
+ if ((dev->pdev->device == 0x9598) &&
+ (dev->pdev->subsystem_vendor == 0x1043) &&
+ (dev->pdev->subsystem_device == 0x01e4)) {
+ if (*connector_type == DRM_MODE_CONNECTOR_HDMIA) {
+ *connector_type = DRM_MODE_CONNECTOR_DVII;
+ }
+ }
+
/* ASUS HD 3450 board lists the DVI port as HDMI */
if ((dev->pdev->device == 0x95C5) &&
(dev->pdev->subsystem_vendor == 0x1043) &&
@@ -1029,8 +1038,15 @@ bool radeon_atombios_sideport_present(struct radeon_device *rdev)
data_offset);
switch (crev) {
case 1:
- if (igp_info->info.ucMemoryType & 0xf0)
- return true;
+ /* AMD IGPS */
+ if ((rdev->family == CHIP_RS690) ||
+ (rdev->family == CHIP_RS740)) {
+ if (igp_info->info.ulBootUpMemoryClock)
+ return true;
+ } else {
+ if (igp_info->info.ucMemoryType & 0xf0)
+ return true;
+ }
break;
case 2:
if (igp_info->info_2.ucMemoryType & 0x0f)
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index f58f8bd8f77..adccbc2c202 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -771,14 +771,14 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect
} else
ret = connector_status_connected;
- /* multiple connectors on the same encoder with the same ddc line
- * This tends to be HDMI and DVI on the same encoder with the
- * same ddc line. If the edid says HDMI, consider the HDMI port
- * connected and the DVI port disconnected. If the edid doesn't
- * say HDMI, vice versa.
+ /* This gets complicated. We have boards with VGA + HDMI with a
+ * shared DDC line and we have boards with DVI-D + HDMI with a shared
+ * DDC line. The latter is more complex because with DVI<->HDMI adapters
+ * you don't really know what's connected to which port as both are digital.
*/
if (radeon_connector->shared_ddc && (ret == connector_status_connected)) {
struct drm_device *dev = connector->dev;
+ struct radeon_device *rdev = dev->dev_private;
struct drm_connector *list_connector;
struct radeon_connector *list_radeon_connector;
list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
@@ -788,15 +788,10 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect
if (list_radeon_connector->shared_ddc &&
(list_radeon_connector->ddc_bus->rec.i2c_id ==
radeon_connector->ddc_bus->rec.i2c_id)) {
- if (drm_detect_hdmi_monitor(radeon_connector->edid)) {
- if (connector->connector_type == DRM_MODE_CONNECTOR_DVID) {
- kfree(radeon_connector->edid);
- radeon_connector->edid = NULL;
- ret = connector_status_disconnected;
- }
- } else {
- if ((connector->connector_type == DRM_MODE_CONNECTOR_HDMIA) ||
- (connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)) {
+ /* cases where both connectors are digital */
+ if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+ /* hpd is our only option in this case */
+ if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
kfree(radeon_connector->edid);
radeon_connector->edid = NULL;
ret = connector_status_disconnected;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 5f317317aba..dd279da9054 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -226,20 +226,20 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
{
u64 size_af, size_bf;
- size_af = 0xFFFFFFFF - mc->vram_end;
- size_bf = mc->vram_start;
+ size_af = ((0xFFFFFFFF - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
+ size_bf = mc->vram_start & ~mc->gtt_base_align;
if (size_bf > size_af) {
if (mc->gtt_size > size_bf) {
dev_warn(rdev->dev, "limiting GTT\n");
mc->gtt_size = size_bf;
}
- mc->gtt_start = mc->vram_start - mc->gtt_size;
+ mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
} else {
if (mc->gtt_size > size_af) {
dev_warn(rdev->dev, "limiting GTT\n");
mc->gtt_size = size_af;
}
- mc->gtt_start = mc->vram_end + 1;
+ mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
}
mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
dev_info(rdev->dev, "GTT: %lluM 0x%08llX - 0x%08llX\n",
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index f2ed27c8055..03204039774 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -642,8 +642,8 @@ void radeon_legacy_tv_mode_set(struct drm_encoder *encoder,
}
flicker_removal = (tmp + 500) / 1000;
- if (flicker_removal < 2)
- flicker_removal = 2;
+ if (flicker_removal < 3)
+ flicker_removal = 3;
for (i = 0; i < ARRAY_SIZE(SLOPE_limit); ++i) {
if (flicker_removal == SLOPE_limit[i])
break;
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 9e4240b3bf0..f454c9a5e7f 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -57,7 +57,9 @@ void rs400_gart_adjust_size(struct radeon_device *rdev)
}
if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) {
/* FIXME: RS400 & RS480 seems to have issue with GART size
- * if 4G of system memory (needs more testing) */
+ * if 4G of system memory (needs more testing)
+ */
+ /* XXX is this still an issue with proper alignment? */
rdev->mc.gtt_size = 32 * 1024 * 1024;
DRM_ERROR("Forcing to 32M GART size (because of ASIC bug ?)\n");
}
@@ -263,6 +265,7 @@ void rs400_mc_init(struct radeon_device *rdev)
r100_vram_init_sizes(rdev);
base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
radeon_vram_location(rdev, &rdev->mc, base);
+ rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
}
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 7bb4c3e52f3..6dc15ea8ba3 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -698,6 +698,7 @@ void rs600_mc_init(struct radeon_device *rdev)
base = G_000004_MC_FB_START(base) << 16;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
radeon_vram_location(rdev, &rdev->mc, base);
+ rdev->mc.gtt_base_align = 0;
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
}
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index f4f0a61bcdc..ce4ecbe1081 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -162,6 +162,7 @@ void rs690_mc_init(struct radeon_device *rdev)
rs690_pm_info(rdev);
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
radeon_vram_location(rdev, &rdev->mc, base);
+ rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 7d9a7b0a180..0c9c169a685 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -195,6 +195,7 @@ void rv515_mc_init(struct radeon_device *rdev)
rv515_vram_get_type(rdev);
r100_vram_init_sizes(rdev);
radeon_vram_location(rdev, &rdev->mc, 0);
+ rdev->mc.gtt_base_align = 0;
if (!(rdev->flags & RADEON_IS_AGP))
radeon_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index b1d67dc973d..d233c65f3f7 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -40,7 +40,9 @@
#include <linux/slab.h>
#include <asm/atomic.h>
+#ifdef TTM_HAS_AGP
#include <asm/agp.h>
+#endif
#include "ttm/ttm_bo_driver.h"
#include "ttm/ttm_page_alloc.h"
@@ -392,7 +394,7 @@ static int ttm_pool_get_num_unused_pages(void)
/**
* Callback for mm to request pool to reduce number of page held.
*/
-static int ttm_pool_mm_shrink(int shrink_pages, gfp_t gfp_mask)
+static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask)
{
static atomic_t start_pool = ATOMIC_INIT(0);
unsigned i;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index f1d62611241..437ac786277 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -972,6 +972,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
ret = copy_from_user(rects, user_rects, rects_size);
if (unlikely(ret != 0)) {
DRM_ERROR("Failed to get rects.\n");
+ ret = -EFAULT;
goto out_free;
}
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
index 9bec24db4d4..2d44b330010 100644
--- a/drivers/misc/cs5535-mfgpt.c
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -366,6 +366,6 @@ static int __init cs5535_mfgpt_init(void)
module_init(cs5535_mfgpt_init);
-MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index af217924a76..ad30f074ee1 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -365,6 +365,26 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
static int __devexit sdhci_s3c_remove(struct platform_device *pdev)
{
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+ struct sdhci_s3c *sc = sdhci_priv(host);
+ int ptr;
+
+ sdhci_remove_host(host, 1);
+
+ for (ptr = 0; ptr < 3; ptr++) {
+ clk_disable(sc->clk_bus[ptr]);
+ clk_put(sc->clk_bus[ptr]);
+ }
+ clk_disable(sc->clk_io);
+ clk_put(sc->clk_io);
+
+ iounmap(host->ioaddr);
+ release_resource(sc->ioarea);
+ kfree(sc->ioarea);
+
+ sdhci_free_host(host);
+ platform_set_drvdata(pdev, NULL);
+
return 0;
}
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 7acb3edc47e..2602852cc55 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -677,7 +677,7 @@ static int ibmveth_close(struct net_device *netdev)
if (!adapter->pool_config)
netif_stop_queue(netdev);
- free_irq(netdev->irq, netdev);
+ h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
do {
lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
@@ -689,6 +689,8 @@ static int ibmveth_close(struct net_device *netdev)
lpar_rc);
}
+ free_irq(netdev->irq, netdev);
+
adapter->rx_no_buffer = *(u64*)(((char*)adapter->buffer_list_addr) + 4096 - 8);
ibmveth_cleanup(adapter);
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 5b3dfb4ab27..33525bf2a3d 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1168,6 +1168,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
int interrupts, nr_serviced = 0, i;
struct ei_device *ei_local;
int handled = 0;
+ unsigned long flags;
e8390_base = dev->base_addr;
ei_local = netdev_priv(dev);
@@ -1176,7 +1177,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
* Protect the irq test too.
*/
- spin_lock(&ei_local->page_lock);
+ spin_lock_irqsave(&ei_local->page_lock, flags);
if (ei_local->irqlock)
{
@@ -1188,7 +1189,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
dev->name, inb_p(e8390_base + EN0_ISR),
inb_p(e8390_base + EN0_IMR));
#endif
- spin_unlock(&ei_local->page_lock);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
return IRQ_NONE;
}
@@ -1261,7 +1262,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
ei_local->irqlock = 0;
outb_p(ENISR_ALL, e8390_base + EN0_IMR);
- spin_unlock(&ei_local->page_lock);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
return IRQ_RETVAL(handled);
}
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 96b6cfbf0a3..cdc6a5c2e70 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1316,7 +1316,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
{ 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_26 },
/* 8168C family. */
- { 0x7cf00000, 0x3ca00000, RTL_GIGA_MAC_VER_24 },
+ { 0x7cf00000, 0x3cb00000, RTL_GIGA_MAC_VER_24 },
{ 0x7cf00000, 0x3c900000, RTL_GIGA_MAC_VER_23 },
{ 0x7cf00000, 0x3c800000, RTL_GIGA_MAC_VER_18 },
{ 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_24 },
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 77b359162d6..23c15aa9fbd 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -730,13 +730,17 @@ static int ath9k_hif_usb_alloc_urbs(struct hif_device_usb *hif_dev)
/* RX */
if (ath9k_hif_usb_alloc_rx_urbs(hif_dev) < 0)
- goto err;
+ goto err_rx;
/* Register Read */
if (ath9k_hif_usb_alloc_reg_in_urb(hif_dev) < 0)
- goto err;
+ goto err_reg;
return 0;
+err_reg:
+ ath9k_hif_usb_dealloc_rx_urbs(hif_dev);
+err_rx:
+ ath9k_hif_usb_dealloc_tx_urbs(hif_dev);
err:
return -ENOMEM;
}
diff --git a/drivers/net/wireless/hostap/hostap_pci.c b/drivers/net/wireless/hostap/hostap_pci.c
index d24dc7dc072..972a9c3af39 100644
--- a/drivers/net/wireless/hostap/hostap_pci.c
+++ b/drivers/net/wireless/hostap/hostap_pci.c
@@ -330,6 +330,7 @@ static int prism2_pci_probe(struct pci_dev *pdev,
dev->irq = pdev->irq;
hw_priv->mem_start = mem;
+ dev->base_addr = (unsigned long) mem;
prism2_pci_cor_sreset(local);
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.h b/drivers/net/wireless/iwlwifi/iwl-sta.h
index c2a453a1a99..dc43ebd1f1f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.h
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.h
@@ -97,6 +97,17 @@ static inline void iwl_clear_driver_stations(struct iwl_priv *priv)
spin_lock_irqsave(&priv->sta_lock, flags);
memset(priv->stations, 0, sizeof(priv->stations));
priv->num_stations = 0;
+
+ /*
+ * Remove all key information that is not stored as part of station
+ * information since mac80211 may not have had a
+ * chance to remove all the keys. When device is reconfigured by
+ * mac80211 after an error all keys will be reconfigured.
+ */
+ priv->ucode_key_table = 0;
+ priv->key_mapping_key = 0;
+ memset(priv->wep_keys, 0, sizeof(priv->wep_keys));
+
spin_unlock_irqrestore(&priv->sta_lock, flags);
}
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 3ae468c4d76..f20d3eeeea7 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -854,6 +854,11 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
BIT(NL80211_IFTYPE_WDS);
/*
+ * Initialize configuration work.
+ */
+ INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled);
+
+ /*
* Let the driver probe the device to detect the capabilities.
*/
retval = rt2x00dev->ops->lib->probe_hw(rt2x00dev);
@@ -863,11 +868,6 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
}
/*
- * Initialize configuration work.
- */
- INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled);
-
- /*
* Allocate queue array.
*/
retval = rt2x00queue_allocate(rt2x00dev);
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 92379e2d37e..2aaa13150de 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -156,6 +156,38 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
pcibios_align_resource, dev);
}
+ if (ret < 0 && dev->fw_addr[resno]) {
+ struct resource *root, *conflict;
+ resource_size_t start, end;
+
+ /*
+ * If we failed to assign anything, let's try the address
+ * where firmware left it. That at least has a chance of
+ * working, which is better than just leaving it disabled.
+ */
+
+ if (res->flags & IORESOURCE_IO)
+ root = &ioport_resource;
+ else
+ root = &iomem_resource;
+
+ start = res->start;
+ end = res->end;
+ res->start = dev->fw_addr[resno];
+ res->end = res->start + size - 1;
+ dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
+ resno, res);
+ conflict = request_resource_conflict(root, res);
+ if (conflict) {
+ dev_info(&dev->dev,
+ "BAR %d: %pR conflicts with %s %pR\n", resno,
+ res, conflict->name, conflict);
+ res->start = start;
+ res->end = end;
+ } else
+ ret = 0;
+ }
+
if (!ret) {
res->flags &= ~IORESOURCE_STARTALIGN;
dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 29f91fac1df..a4cd9adfcbc 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -857,8 +857,10 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev)
{
pcmcia_release_configuration(p_dev);
pcmcia_release_io(p_dev, &p_dev->io);
- if (p_dev->_irq)
+ if (p_dev->_irq) {
free_irq(p_dev->irq, p_dev->priv);
+ p_dev->_irq = 0;
+ }
if (p_dev->win)
pcmcia_release_window(p_dev, p_dev->win);
}
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 40658e3385b..bb2f1fba637 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -489,7 +489,7 @@ int intel_scu_ipc_simple_command(int cmd, int sub)
mutex_unlock(&ipclock);
return -ENODEV;
}
- ipc_command(cmd << 12 | sub);
+ ipc_command(sub << 12 | cmd);
err = busy_loop();
mutex_unlock(&ipclock);
return err;
@@ -501,9 +501,9 @@ EXPORT_SYMBOL(intel_scu_ipc_simple_command);
* @cmd: command
* @sub: sub type
* @in: input data
- * @inlen: input length
+ * @inlen: input length in dwords
* @out: output data
- * @outlein: output length
+ * @outlein: output length in dwords
*
* Issue a command to the SCU which involves data transfers. Do the
* data copies under the lock but leave it for the caller to interpret
@@ -524,7 +524,7 @@ int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
for (i = 0; i < inlen; i++)
ipc_data_writel(*in++, 4 * i);
- ipc_command((cmd << 12) | sub | (inlen << 18));
+ ipc_command((sub << 12) | cmd | (inlen << 18));
err = busy_loop();
for (i = 0; i < outlen; i++)
@@ -556,6 +556,10 @@ int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data)
u32 cmd = 0;
mutex_lock(&ipclock);
+ if (ipcdev.pdev == NULL) {
+ mutex_unlock(&ipclock);
+ return -ENODEV;
+ }
cmd = (addr >> 24) & 0xFF;
if (cmd == IPC_I2C_READ) {
writel(addr, ipcdev.i2c_base + IPC_I2C_CNTRL_ADDR);
diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c
index d762a0cbc6a..2afbeec8b79 100644
--- a/drivers/power/ds2782_battery.c
+++ b/drivers/power/ds2782_battery.c
@@ -163,7 +163,7 @@ static int ds2782_get_capacity(struct ds278x_info *info, int *capacity)
if (err)
return err;
*capacity = raw;
- return raw;
+ return 0;
}
static int ds2786_get_current(struct ds278x_info *info, int *current_uA)
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 34d51dd4c53..bed7b4634cc 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -948,8 +948,10 @@ static ssize_t dasd_alias_show(struct device *dev,
if (device->discipline && device->discipline->get_uid &&
!device->discipline->get_uid(device, &uid)) {
if (uid.type == UA_BASE_PAV_ALIAS ||
- uid.type == UA_HYPER_PAV_ALIAS)
+ uid.type == UA_HYPER_PAV_ALIAS) {
+ dasd_put_device(device);
return sprintf(buf, "1\n");
+ }
}
dasd_put_device(device);
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index ce7cb87479f..407d0e9adfa 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -713,7 +713,7 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid,
ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, chsc_resp);
if (ret)
goto out_free;
- memcpy(desc, &chsc_resp->data, chsc_resp->length);
+ memcpy(desc, &chsc_resp->data, sizeof(*desc));
out_free:
kfree(chsc_resp);
return ret;
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index d53e62ab09d..aacbe14e2e7 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -554,7 +554,7 @@ static int opiocgetnext(unsigned int cmd, void __user *argp)
static int openprom_bsd_ioctl(struct file * file,
unsigned int cmd, unsigned long arg)
{
- DATA *data = (DATA *) file->private_data;
+ DATA *data = file->private_data;
void __user *argp = (void __user *)arg;
int err;
@@ -601,7 +601,7 @@ static int openprom_bsd_ioctl(struct file * file,
static long openprom_ioctl(struct file * file,
unsigned int cmd, unsigned long arg)
{
- DATA *data = (DATA *) file->private_data;
+ DATA *data = file->private_data;
switch (cmd) {
case OPROMGETOPT:
diff --git a/drivers/serial/suncore.c b/drivers/serial/suncore.c
index ed7d958b0a0..544f2e25d0e 100644
--- a/drivers/serial/suncore.c
+++ b/drivers/serial/suncore.c
@@ -71,7 +71,9 @@ int sunserial_console_match(struct console *con, struct device_node *dp,
con->index = line;
drv->cons = con;
- add_preferred_console(con->name, line, NULL);
+
+ if (!console_set_on_cmdline)
+ add_preferred_console(con->name, line, NULL);
return 1;
}
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 234459c2f01..ffbf4553f66 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1500,20 +1500,25 @@ out_unmap:
static int __devexit su_remove(struct of_device *op)
{
struct uart_sunsu_port *up = dev_get_drvdata(&op->dev);
+ bool kbdms = false;
if (up->su_type == SU_PORT_MS ||
- up->su_type == SU_PORT_KBD) {
+ up->su_type == SU_PORT_KBD)
+ kbdms = true;
+
+ if (kbdms) {
#ifdef CONFIG_SERIO
serio_unregister_port(&up->serio);
#endif
- kfree(up);
- } else if (up->port.type != PORT_UNKNOWN) {
+ } else if (up->port.type != PORT_UNKNOWN)
uart_remove_one_port(&sunsu_reg, &up->port);
- }
if (up->port.membase)
of_iounmap(&op->resource[0], up->port.membase, up->reg_size);
+ if (kbdms)
+ kfree(up);
+
dev_set_drvdata(&op->dev, NULL);
return 0;
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index d69eccf5f19..2aaa0f75c6c 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -136,7 +136,7 @@ struct ffs_data {
* handling setup requests immidiatelly user space may be so
* slow that another setup will be sent to the gadget but this
* time not to us but another function and then there could be
- * a race. Is taht the case? Or maybe we can use cdev->req
+ * a race. Is that the case? Or maybe we can use cdev->req
* after all, maybe we just need some spinlock for that? */
struct usb_request *ep0req; /* P: mutex */
struct completion ep0req_completion; /* P: mutex */
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 57a593c58cf..d219070fed3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -177,8 +177,8 @@ static void handle_tx(struct vhost_net *net)
break;
}
if (err != len)
- pr_err("Truncated TX packet: "
- " len %d != %zd\n", err, len);
+ pr_debug("Truncated TX packet: "
+ " len %d != %zd\n", err, len);
vhost_add_used_and_signal(&net->dev, vq, head, 0);
total_len += len;
if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
@@ -275,8 +275,8 @@ static void handle_rx(struct vhost_net *net)
}
/* TODO: Should check and handle checksum. */
if (err > len) {
- pr_err("Discarded truncated rx packet: "
- " len %d > %zd\n", err, len);
+ pr_debug("Discarded truncated rx packet: "
+ " len %d > %zd\n", err, len);
vhost_discard_vq_desc(vq);
continue;
}
@@ -534,11 +534,16 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
rcu_assign_pointer(vq->private_data, sock);
vhost_net_enable_vq(n, vq);
done:
+ mutex_unlock(&vq->mutex);
+
if (oldsock) {
vhost_net_flush_vq(n, index);
fput(oldsock->file);
}
+ mutex_unlock(&n->dev.mutex);
+ return 0;
+
err_vq:
mutex_unlock(&vq->mutex);
err:
diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c
index 515cf1978d1..c4e17642d9c 100644
--- a/drivers/video/aty/radeon_pm.c
+++ b/drivers/video/aty/radeon_pm.c
@@ -2872,7 +2872,7 @@ void radeonfb_pm_init(struct radeonfb_info *rinfo, int dynclk, int ignore_devlis
}
#if 0
- /* Power down TV DAC, taht saves a significant amount of power,
+ /* Power down TV DAC, that saves a significant amount of power,
* we'll have something better once we actually have some TVOut
* support
*/
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe..c3df14ce2cc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2923,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2939,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -3019,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4dbaf89b133..9254b3d58db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
*/
/* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE))
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* determine range to clone */
ret = -EINVAL;
- if (off >= src->i_size || off + len > src->i_size)
+ if (off + len > src->i_size || off + len < off)
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
+ u64 endoff;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (new_key.offset + datal > inode->i_size)
- btrfs_i_size_write(inode,
- new_key.offset + datal);
+
+ /*
+ * we round up to the block size at eof when
+ * determining which extents to clone above,
+ * but shouldn't round up the file size
+ */
+ endoff = new_key.offset + datal;
+ if (endoff > off+olen)
+ endoff = off+olen;
+ if (endoff > inode->i_size)
+ btrfs_i_size_write(inode, endoff);
+
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 3fe49042d8a..6d44053ecff 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
+ if (xi->auth_authorizer.buf)
+ ceph_buffer_put(xi->auth_authorizer.buf);
+
kfree(ac->private);
ac->private = NULL;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3ab79f6c4ce..416c08d315d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
+ /* make note of release offset, in case we need to replay */
+ req->r_request_release_offset = p - msg->front.iov_base;
+
/* cap releases */
releases = 0;
if (req->r_inode_drop)
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+ if (req->r_got_unsafe) {
+ /*
+ * Replay. Do not regenerate message (and rebuild
+ * paths, etc.); just use the original message.
+ * Rebuilding paths will break for renames because
+ * d_move mangles the src name.
+ */
+ msg = req->r_request;
+ rhead = msg->front.iov_base;
+
+ flags = le32_to_cpu(rhead->flags);
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ rhead->flags = cpu_to_le32(flags);
+
+ if (req->r_target_inode)
+ rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+ rhead->num_retry = req->r_attempts - 1;
+
+ /* remove cap/dentry releases from message */
+ rhead->num_releases = 0;
+ msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+ msg->front.iov_len = req->r_request_release_offset;
+ return 0;
+ }
+
if (req->r_request) {
ceph_msg_put(req->r_request);
req->r_request = NULL;
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
+ rhead->ino = 0;
dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
- if (req->r_target_inode && req->r_got_unsafe)
- rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
- else
- rhead->ino = 0;
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index b292fa42a66..952410c60d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -188,6 +188,7 @@ struct ceph_mds_request {
int r_old_inode_drop, r_old_inode_unless;
struct ceph_msg *r_request; /* original request */
+ int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
int r_err;
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 9ad43a310a4..15167b2daa5 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
* nicely render a sockaddr as a string.
*/
#define MAX_ADDR_STR 20
-static char addr_str[MAX_ADDR_STR][40];
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
static DEFINE_SPINLOCK(addr_str_lock);
static int last_addr_str;
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
int i;
char *s;
struct sockaddr_in *in4 = (void *)ss;
- unsigned char *quad = (void *)&in4->sin_addr.s_addr;
struct sockaddr_in6 *in6 = (void *)ss;
spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
switch (ss->ss_family) {
case AF_INET:
- sprintf(s, "%u.%u.%u.%u:%u",
- (unsigned int)quad[0],
- (unsigned int)quad[1],
- (unsigned int)quad[2],
- (unsigned int)quad[3],
- (unsigned int)ntohs(in4->sin_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+ (unsigned int)ntohs(in4->sin_port));
break;
case AF_INET6:
- sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u",
- in6->sin6_addr.s6_addr16[0],
- in6->sin6_addr.s6_addr16[1],
- in6->sin6_addr.s6_addr16[2],
- in6->sin6_addr.s6_addr16[3],
- in6->sin6_addr.s6_addr16[4],
- in6->sin6_addr.s6_addr16[5],
- in6->sin6_addr.s6_addr16[6],
- in6->sin6_addr.s6_addr16[7],
- (unsigned int)ntohs(in6->sin6_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+ (unsigned int)ntohs(in6->sin6_port));
break;
default:
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
*/
static struct socket *ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr;
+ struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
if (ret)
return ERR_PTR(ret);
con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
- ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
pr_addr(&con->peer_addr.in_addr),
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
int port;
+ char delim = ',';
+
+ if (*p == '[') {
+ delim = ']';
+ p++;
+ }
memset(ss, 0, sizeof(*ss));
if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
- ',', &ipend)) {
+ delim, &ipend))
ss->ss_family = AF_INET;
- } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
- ',', &ipend)) {
+ else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+ delim, &ipend))
ss->ss_family = AF_INET6;
- } else {
+ else
goto bad;
- }
p = ipend;
+ if (delim == ']') {
+ if (*p != ']') {
+ dout("missing matching ']'\n");
+ goto bad;
+ }
+ p++;
+ }
+
/* port? */
if (p < end && *p == ':') {
port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
return 0;
bad:
- pr_err("parse_ips bad ip '%s'\n", c);
+ pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
}
@@ -2015,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p\n", con, msg);
+ dout("con_revoke %p msg %p - was on queue\n", con, msg);
list_del_init(&msg->list_head);
ceph_msg_put(msg);
msg->hdr.seq = 0;
- if (con->out_msg == msg) {
- ceph_msg_put(con->out_msg);
- con->out_msg = NULL;
- }
+ }
+ if (con->out_msg == msg) {
+ dout("con_revoke %p msg %p - was sending\n", con, msg);
+ con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- } else {
- dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg);
+ ceph_msg_put(msg);
+ msg->hdr.seq = 0;
}
mutex_unlock(&con->mutex);
}
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 50ce64ebd33..277f8b33957 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
ev, CEPH_PG_POOL_VERSION);
+ kfree(pi);
goto bad;
}
__decode_pool(p, pi);
diff --git a/fs/dcache.c b/fs/dcache.c
index c8c78ba0782..86d4db15473 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dbab3fdc258..0898f3ec821 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1358,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_glock *gl;
int may_demote;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b256d6f2428..8f02d3db8f4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd1..e7d236ca48b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
extern const struct quotactl_ops gfs2_quotactl_ops;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d6..722860b323a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
* This function is passed the number of inodes to scan, and it returns the
* total number of remaining possibly-reclaimable inodes.
*/
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff593276..036880895bf 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
- struct jbd2_buffer_trigger_type *triggers;
journal_t *journal = transaction->t_journal;
/*
@@ -328,21 +327,21 @@ repeat:
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
- triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
- triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
- * Fire any commit trigger. Do this before checking for escaping,
- * as the trigger may modify the magic offset. If a copy-out
- * happens afterwards, it will have the correct data in the buffer.
+ * Fire data frozen trigger if data already wasn't frozen. Do this
+ * before checking for escaping, as the trigger may modify the magic
+ * offset. If a copy-out happens afterwards, it will have the correct
+ * data in the buffer.
*/
- jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
- triggers);
+ if (!done_copy_out)
+ jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+ jh_in->b_triggers);
/*
* Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620a..b8e0806681b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
page = jh2bh(jh)->b_page;
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
source = kmap_atomic(page, KM_USER0);
+ /* Fire data frozen trigger just before we copy the data */
+ jbd2_buffer_frozen_trigger(jh, source + offset,
+ jh->b_triggers);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
jh->b_triggers = type;
}
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
- if (!triggers || !triggers->t_commit)
+ if (!triggers || !triggers->t_frozen)
return;
- triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+ triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index a2d58c96f1b..d258e261bdc 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
{
- /* success of check_xattr_ref_inode() means taht inode (ic) dose not have
+ /* success of check_xattr_ref_inode() means that inode (ic) dose not have
* duplicate name/value pairs. If duplicate name/value pair would be found,
* one will be removed.
*/
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a..e28f21b9534 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
* What the mbcache registers as to get shrunk dynamically.
*/
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
static struct shrinker mb_cache_shrinker = {
.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
* This function is called by the kernel memory management when memory
* gets low.
*
+ * @shrink: (ignored)
* @nr_to_scan: Number of objects to scan
* @gfp_mask: (ignored)
*
* Returns the number of objects which are present in the cache.
*/
static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(free_list);
struct list_head *l, *ltmp;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91..e60416d3f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(head);
struct nfs_inode *nfsi;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386..e70f44b9b3f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
/* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+ int nr_to_scan, gfp_t gfp_mask);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc1..356e976772b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
dump_stack();
goto bail;
}
-
- past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
- mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
- (unsigned long long)past_eof);
-
- if (create && (iblock >= past_eof))
- set_buffer_new(bh_result);
}
+ past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
+ mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+ (unsigned long long)past_eof);
+ if (create && (iblock >= past_eof))
+ set_buffer_new(bh_result);
+
bail:
if (err < 0)
err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
return ret;
}
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
- struct page *page,
- unsigned from,
- unsigned to)
-{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- handle_t *handle;
- int ret = 0;
-
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto out;
- }
-
- if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
- if (ret < 0)
- mlog_errno(ret);
- }
-out:
- if (ret) {
- if (!IS_ERR(handle))
- ocfs2_commit_trans(osb, handle);
- handle = ERR_PTR(ret);
- }
- return handle;
-}
-
static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
{
sector_t status;
@@ -1131,23 +1100,37 @@ out:
*/
static int ocfs2_grab_pages_for_write(struct address_space *mapping,
struct ocfs2_write_ctxt *wc,
- u32 cpos, loff_t user_pos, int new,
+ u32 cpos, loff_t user_pos,
+ unsigned user_len, int new,
struct page *mmap_page)
{
int ret = 0, i;
- unsigned long start, target_index, index;
+ unsigned long start, target_index, end_index, index;
struct inode *inode = mapping->host;
+ loff_t last_byte;
target_index = user_pos >> PAGE_CACHE_SHIFT;
/*
* Figure out how many pages we'll be manipulating here. For
* non allocating write, we just change the one
- * page. Otherwise, we'll need a whole clusters worth.
+ * page. Otherwise, we'll need a whole clusters worth. If we're
+ * writing past i_size, we only need enough pages to cover the
+ * last page of the write.
*/
if (new) {
wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+ /*
+ * We need the index *past* the last page we could possibly
+ * touch. This is the page past the end of the write or
+ * i_size, whichever is greater.
+ */
+ last_byte = max(user_pos + user_len, i_size_read(inode));
+ BUG_ON(last_byte < 1);
+ end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+ if ((start + wc->w_num_pages) > end_index)
+ wc->w_num_pages = end_index - start;
} else {
wc->w_num_pages = 1;
start = target_index;
@@ -1620,21 +1603,20 @@ out:
* write path can treat it as an non-allocating write, which has no
* special case code for sparse/nonsparse files.
*/
-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
- unsigned len,
+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
+ struct buffer_head *di_bh,
+ loff_t pos, unsigned len,
struct ocfs2_write_ctxt *wc)
{
int ret;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t newsize = pos + len;
- if (ocfs2_sparse_alloc(osb))
- return 0;
+ BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
if (newsize <= i_size_read(inode))
return 0;
- ret = ocfs2_extend_no_holes(inode, newsize, pos);
+ ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
if (ret)
mlog_errno(ret);
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
return ret;
}
+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
+ loff_t pos)
+{
+ int ret = 0;
+
+ BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
+ if (pos > i_size_read(inode))
+ ret = ocfs2_zero_extend(inode, di_bh, pos);
+
+ return ret;
+}
+
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
}
}
- ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
+ if (ocfs2_sparse_alloc(osb))
+ ret = ocfs2_zero_tail(inode, di_bh, pos);
+ else
+ ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
+ wc);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* that we can zero and flush if we error after adding the
* extent.
*/
- ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
+ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e174..153abb5abef 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
struct dlm_ctxt *dlm = NULL;
struct dlm_ctxt *new_ctxt = NULL;
- if (strlen(domain) > O2NM_MAX_NAME_LEN) {
+ if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
ret = -ENAMETOOLONG;
mlog(ML_ERROR, "domain name length too long\n");
goto leave;
@@ -1709,6 +1709,7 @@ retry:
}
if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+ spin_unlock(&dlm_domain_lock);
mlog(ML_ERROR,
"Requested locking protocol version is not "
"compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e31..94b97fc6a88 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
mlog(0, "trying again...\n");
goto again;
}
- /* now that we are sure the MIGRATING state is there, drop
- * the unneded state which blocked threads trying to DIRTY */
- spin_lock(&res->spinlock);
- BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
- BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
- res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
- spin_unlock(&res->spinlock);
+ ret = 0;
/* did the target go down or die? */
spin_lock(&dlm->spinlock);
if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
spin_unlock(&dlm->spinlock);
/*
+ * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
+ * another try; otherwise, we are sure the MIGRATING state is there,
+ * drop the unneded state which blocked threads trying to DIRTY
+ */
+ spin_lock(&res->spinlock);
+ BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+ res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+ if (!ret)
+ BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+ spin_unlock(&res->spinlock);
+
+ /*
* at this point:
*
- * o the DLM_LOCK_RES_MIGRATING flag is set
+ * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
* o there are no pending asts on this lockres
* o all processes trying to reserve an ast on this
* lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be7..9dfaac73b36 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
- bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
+ bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c44..2b10b36d157 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
return status;
}
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ handle_t *handle = NULL;
+ int ret = 0;
+
+ if (!ocfs2_should_order_data(inode))
+ goto out;
+
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_jbd2_file_inode(handle, inode);
+ if (ret < 0)
+ mlog_errno(ret);
+
+out:
+ if (ret) {
+ if (!IS_ERR(handle))
+ ocfs2_commit_trans(osb, handle);
+ handle = ERR_PTR(ret);
+ }
+ return handle;
+}
+
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
- u64 size)
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
+ u64 abs_to)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- unsigned long index;
- unsigned int offset;
+ unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
handle_t *handle = NULL;
- int ret;
+ int ret = 0;
+ unsigned zero_from, zero_to, block_start, block_end;
- offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
- /* ugh. in prepare/commit_write, if from==to==start of block, we
- ** skip the prepare. make sure we never send an offset for the start
- ** of a block
- */
- if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
- offset++;
- }
- index = size >> PAGE_CACHE_SHIFT;
+ BUG_ON(abs_from >= abs_to);
+ BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+ BUG_ON(abs_from & (inode->i_blkbits - 1));
page = grab_cache_page(mapping, index);
if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
goto out;
}
- ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_unlock;
- }
+ /* Get the offsets within the page that we want to zero */
+ zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
+ zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+ if (!zero_to)
+ zero_to = PAGE_CACHE_SIZE;
- if (ocfs2_should_order_data(inode)) {
- handle = ocfs2_start_walk_page_trans(inode, page, offset,
- offset);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ mlog(0,
+ "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
+ (unsigned long long)abs_from, (unsigned long long)abs_to,
+ index, zero_from, zero_to);
+
+ /* We know that zero_from is block aligned */
+ for (block_start = zero_from; block_start < zero_to;
+ block_start = block_end) {
+ block_end = block_start + (1 << inode->i_blkbits);
+
+ /*
+ * block_start is block-aligned. Bump it by one to
+ * force ocfs2_{prepare,commit}_write() to zero the
+ * whole block.
+ */
+ ret = ocfs2_prepare_write_nolock(inode, page,
+ block_start + 1,
+ block_start + 1);
+ if (ret < 0) {
+ mlog_errno(ret);
goto out_unlock;
}
- }
- /* must not update i_size! */
- ret = block_commit_write(page, offset, offset);
- if (ret < 0)
- mlog_errno(ret);
- else
- ret = 0;
+ if (!handle) {
+ handle = ocfs2_zero_start_ordered_transaction(inode);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ break;
+ }
+ }
+
+ /* must not update i_size! */
+ ret = block_commit_write(page, block_start + 1,
+ block_start + 1);
+ if (ret < 0)
+ mlog_errno(ret);
+ else
+ ret = 0;
+ }
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+
out_unlock:
unlock_page(page);
page_cache_release(page);
@@ -786,22 +838,114 @@ out:
return ret;
}
-static int ocfs2_zero_extend(struct inode *inode,
- u64 zero_to_size)
+/*
+ * Find the next range to zero. We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache. We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed. range_start and range_end return the next zeroing
+ * range. A subsequent call should pass the previous range_end as its
+ * zero_start. If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over. Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 zero_start, u64 zero_end,
+ u64 *range_start, u64 *range_end)
{
- int ret = 0;
- u64 start_off;
- struct super_block *sb = inode->i_sb;
+ int rc = 0, needs_cow = 0;
+ u32 p_cpos, zero_clusters = 0;
+ u32 zero_cpos =
+ zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+ u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
- start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
- while (start_off < zero_to_size) {
- ret = ocfs2_write_zero_page(inode, start_off);
- if (ret < 0) {
- mlog_errno(ret);
+ while (zero_cpos < last_cpos) {
+ rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (rc) {
+ mlog_errno(rc);
+ goto out;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ zero_clusters = num_clusters;
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
+ needs_cow = 1;
+ break;
+ }
+
+ zero_cpos += num_clusters;
+ }
+ if (!zero_clusters) {
+ *range_end = 0;
+ goto out;
+ }
+
+ while ((zero_cpos + zero_clusters) < last_cpos) {
+ rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+ &p_cpos, &num_clusters,
+ &ext_flags);
+ if (rc) {
+ mlog_errno(rc);
goto out;
}
- start_off += sb->s_blocksize;
+ if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+ break;
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
+ needs_cow = 1;
+ zero_clusters += num_clusters;
+ }
+ if ((zero_cpos + zero_clusters) > last_cpos)
+ zero_clusters = last_cpos - zero_cpos;
+
+ if (needs_cow) {
+ rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+ UINT_MAX);
+ if (rc) {
+ mlog_errno(rc);
+ goto out;
+ }
+ }
+
+ *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+ *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+ zero_cpos + zero_clusters);
+
+out:
+ return rc;
+}
+
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+ u64 range_end)
+{
+ int rc = 0;
+ u64 next_pos;
+ u64 zero_pos = range_start;
+
+ mlog(0, "range_start = %llu, range_end = %llu\n",
+ (unsigned long long)range_start,
+ (unsigned long long)range_end);
+ BUG_ON(range_start >= range_end);
+
+ while (zero_pos < range_end) {
+ next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+ if (next_pos > range_end)
+ next_pos = range_end;
+ rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+ if (rc < 0) {
+ mlog_errno(rc);
+ break;
+ }
+ zero_pos = next_pos;
/*
* Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
cond_resched();
}
-out:
+ return rc;
+}
+
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+ loff_t zero_to_size)
+{
+ int ret = 0;
+ u64 zero_start, range_start = 0, range_end = 0;
+ struct super_block *sb = inode->i_sb;
+
+ zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+ mlog(0, "zero_start %llu for i_size %llu\n",
+ (unsigned long long)zero_start,
+ (unsigned long long)i_size_read(inode));
+ while (zero_start < zero_to_size) {
+ ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+ zero_to_size,
+ &range_start,
+ &range_end);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
+ if (!range_end)
+ break;
+ /* Trim the ends */
+ if (range_start < zero_start)
+ range_start = zero_start;
+ if (range_end > zero_to_size)
+ range_end = zero_to_size;
+
+ ret = ocfs2_zero_extend_range(inode, range_start,
+ range_end);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
+ zero_start = range_end;
+ }
+
return ret;
}
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+ u64 new_i_size, u64 zero_to)
{
int ret;
u32 clusters_to_add;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ /*
+ * Only quota files call this without a bh, and they can't be
+ * refcounted.
+ */
+ BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
+
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
if (clusters_to_add < oi->ip_clusters)
clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
* still need to zero the area between the old i_size and the
* new i_size.
*/
- ret = ocfs2_zero_extend(inode, zero_to);
+ ret = ocfs2_zero_extend(inode, di_bh, zero_to);
if (ret < 0)
mlog_errno(ret);
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
goto out;
if (i_size_read(inode) == new_i_size)
- goto out;
+ goto out;
BUG_ON(new_i_size < i_size_read(inode));
/*
- * Fall through for converting inline data, even if the fs
- * supports sparse files.
- *
- * The check for inline data here is legal - nobody can add
- * the feature since we have i_mutex. We must check it again
- * after acquiring ip_alloc_sem though, as paths like mmap
- * might have raced us to converting the inode to extents.
- */
- if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
- && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- goto out_update_size;
-
- /*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
- * here.
+ * here. We even have to hold it for sparse files because there
+ * might be some tail zeroing.
*/
down_write(&oi->ip_alloc_sem);
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
up_write(&oi->ip_alloc_sem);
-
mlog_errno(ret);
goto out;
}
}
- if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+ else
+ ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+ new_i_size);
up_write(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70..97bf761c9e7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
- u64 zero_to);
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+ u64 new_i_size, u64 zero_to);
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+ loff_t zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf1641..625de9d7088 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
}
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Quota blocks have their own trigger because the struct ocfs2_block_check
* offset depends on the blocksize.
*/
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Directory blocks also have their own trigger because the
* struct ocfs2_block_check offset depends on the blocksize.
*/
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
static struct ocfs2_triggers di_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
static struct ocfs2_triggers eb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
static struct ocfs2_triggers rb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
static struct ocfs2_triggers gd_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
static struct ocfs2_triggers db_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_db_commit_trigger,
+ .t_frozen = ocfs2_db_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers xb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
static struct ocfs2_triggers dq_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_dq_commit_trigger,
+ .t_frozen = ocfs2_dq_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
mutex_lock(&os->os_lock);
ocfs2_queue_orphan_scan(osb);
if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
- schedule_delayed_work(&os->os_orphan_scan_work,
+ queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
mutex_unlock(&os->os_lock);
}
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
- schedule_delayed_work(&os->os_orphan_scan_work,
- ocfs2_orphan_scan_timeout());
+ queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+ ocfs2_orphan_scan_timeout());
}
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc..ec6adbf8f55 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
{
unsigned int la_mb;
unsigned int gd_mb;
+ unsigned int la_max_mb;
unsigned int megs_per_slot;
struct super_block *sb = osb->sb;
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
if (megs_per_slot < la_mb)
la_mb = megs_per_slot;
+ /* We can't store more bits than we can in a block. */
+ la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
+ ocfs2_local_alloc_size(sb) * 8);
+ if (la_mb > la_max_mb)
+ la_mb = la_max_mb;
+
return la_mb;
}
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe0051..4607923eb24 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
* locking allocators ranks above a transaction start
*/
WARN_ON(journal_current_handle());
- status = ocfs2_extend_no_holes(gqinode,
+ status = ocfs2_extend_no_holes(gqinode, NULL,
gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
gqinode->i_size);
if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184..dc78764ccc4 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
u64 p_blkno;
/* We are protected by dqio_sem so no locking needed */
- status = ocfs2_extend_no_holes(lqinode,
+ status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + 2 * sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
return ocfs2_local_quota_add_chunk(sb, type, offset);
/* We are protected by dqio_sem so no locking needed */
- status = ocfs2_extend_no_holes(lqinode,
+ status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f651..3ac5aa733e9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+ /*
+ * We only duplicate pages until we reach the page contains i_size - 1.
+ * So trim 'end' to i_size.
+ */
+ if (end > i_size_read(context->inode))
+ end = i_size_read(context->inode);
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode;
struct buffer_head *new_bh = NULL;
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = filemap_fdatawrite(inode->i_mapping);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4..a8e6a95a353 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
le16_to_cpu(bg->bg_free_bits_count));
le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
le16_to_cpu(bg->bg_bits));
- cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
+ cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
le16_add_cpu(&cl->cl_next_free_rec, 1);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cf..d03469f6180 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
struct ocfs2_xattr_value_buf *vb,
struct ocfs2_xattr_set_ctxt *ctxt)
{
- int status = 0;
+ int status = 0, credits;
handle_t *handle = ctxt->handle;
enum ocfs2_alloc_restarted why;
u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
- status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
+ while (clusters_to_add) {
+ status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ break;
+ }
- prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
- status = ocfs2_add_clusters_in_btree(handle,
- &et,
- &logical_start,
- clusters_to_add,
- 0,
- ctxt->data_ac,
- ctxt->meta_ac,
- &why);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
+ prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
+ status = ocfs2_add_clusters_in_btree(handle,
+ &et,
+ &logical_start,
+ clusters_to_add,
+ 0,
+ ctxt->data_ac,
+ ctxt->meta_ac,
+ &why);
+ if ((status < 0) && (status != -EAGAIN)) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ break;
+ }
- ocfs2_journal_dirty(handle, vb->vb_bh);
+ ocfs2_journal_dirty(handle, vb->vb_bh);
- clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
+ clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
+ prev_clusters;
- /*
- * We should have already allocated enough space before the transaction,
- * so no need to restart.
- */
- BUG_ON(why != RESTART_NONE || clusters_to_add);
-
-leave:
+ if (why != RESTART_NONE && clusters_to_add) {
+ /*
+ * We can only fail in case the alloc file doesn't give
+ * up enough clusters.
+ */
+ BUG_ON(why == RESTART_META);
+
+ mlog(0, "restarting xattr value extension for %u"
+ " clusters,.\n", clusters_to_add);
+ credits = ocfs2_calc_extend_credits(inode->i_sb,
+ &vb->vb_xv->xr_list,
+ clusters_to_add);
+ status = ocfs2_extend_trans(handle, credits);
+ if (status < 0) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ break;
+ }
+ }
+ }
return status;
}
@@ -6788,16 +6804,15 @@ out:
return ret;
}
-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
u64 blkno, u64 new_blkno, u32 clusters,
+ u32 *cpos, int num_buckets,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_reflink_xattr_tree_args *args)
{
int i, j, ret = 0;
struct super_block *sb = args->reflink->old_inode->i_sb;
- u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
- u32 num_buckets = clusters * bpc;
int bpb = args->old_bucket->bu_blocks;
struct ocfs2_xattr_value_buf vb = {
.vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
break;
}
- /*
- * The real bucket num in this series of blocks is stored
- * in the 1st bucket.
- */
- if (i == 0)
- num_buckets = le16_to_cpu(
- bucket_xh(args->old_bucket)->xh_num_buckets);
-
ret = ocfs2_xattr_bucket_journal_access(handle,
args->new_bucket,
OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
bucket_block(args->old_bucket, j),
sb->s_blocksize);
+ /*
+ * Record the start cpos so that we can use it to initialize
+ * our xattr tree we also set the xh_num_bucket for the new
+ * bucket.
+ */
+ if (i == 0) {
+ *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
+ xh_entries[0].xe_name_hash);
+ bucket_xh(args->new_bucket)->xh_num_buckets =
+ cpu_to_le16(num_buckets);
+ }
+
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
}
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
ocfs2_xattr_bucket_relse(args->old_bucket);
ocfs2_xattr_bucket_relse(args->new_bucket);
}
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
ocfs2_xattr_bucket_relse(args->new_bucket);
return ret;
}
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+ struct inode *inode,
+ struct ocfs2_reflink_xattr_tree_args *args,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac,
+ u64 blkno, u32 cpos, u32 len)
+{
+ int ret, first_inserted = 0;
+ u32 p_cluster, num_clusters, reflink_cpos = 0;
+ u64 new_blkno;
+ unsigned int num_buckets, reflink_buckets;
+ unsigned int bpc =
+ ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
+
+ ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
+ ocfs2_xattr_bucket_relse(args->old_bucket);
+
+ while (len && num_buckets) {
+ ret = ocfs2_claim_clusters(handle, data_ac,
+ 1, &p_cluster, &num_clusters);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
+ reflink_buckets = min(num_buckets, bpc * num_clusters);
+
+ ret = ocfs2_reflink_xattr_bucket(handle, blkno,
+ new_blkno, num_clusters,
+ &reflink_cpos, reflink_buckets,
+ meta_ac, data_ac, args);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * For the 1st allocated cluster, we make it use the same cpos
+ * so that the xattr tree looks the same as the original one
+ * in the most case.
+ */
+ if (!first_inserted) {
+ reflink_cpos = cpos;
+ first_inserted = 1;
+ }
+ ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
+ num_clusters, 0, meta_ac);
+ if (ret)
+ mlog_errno(ret);
+
+ mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+ (unsigned long long)new_blkno, num_clusters, reflink_cpos);
+
+ len -= num_clusters;
+ blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
+ num_buckets -= reflink_buckets;
+ }
+out:
+ return ret;
+}
+
/*
* Create the same xattr extent record in the new inode's xattr tree.
*/
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
void *para)
{
int ret, credits = 0;
- u32 p_cluster, num_clusters;
- u64 new_blkno;
handle_t *handle;
struct ocfs2_reflink_xattr_tree_args *args =
(struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_extent_tree et;
+ mlog(0, "reflink xattr buckets %llu len %u\n",
+ (unsigned long long)blkno, len);
+
ocfs2_init_xattr_tree_extent_tree(&et,
INODE_CACHE(args->reflink->new_inode),
args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
goto out;
}
- ret = ocfs2_claim_clusters(handle, data_ac,
- len, &p_cluster, &num_clusters);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
-
- mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
- (unsigned long long)blkno, (unsigned long long)new_blkno, len);
- ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
- meta_ac, data_ac, args);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
- (unsigned long long)new_blkno, len, cpos);
- ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
- len, 0, meta_ac);
+ ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
+ meta_ac, data_ac,
+ blkno, cpos, len);
if (ret)
mlog_errno(ret);
-out_commit:
ocfs2_commit_trans(osb, handle);
out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967f..fc8497643fd 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
} *label;
unsigned char *data;
Sector sect;
+ sector_t labelsect;
res = 0;
blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
goto out_freeall;
/*
+ * Special case for FBA disks: label sector does not depend on
+ * blocksize.
+ */
+ if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
+ (info->cu_type == 0x3880 && info->dev_type == 0x3370))
+ labelsect = info->label_block;
+ else
+ labelsect = info->label_block * (blocksize >> 9);
+
+ /*
* Get volume label, extract name and type.
*/
- data = read_part_sector(state, info->label_block*(blocksize/512),
- &sect);
+ data = read_part_sector(state, labelsect, &sect);
if (data == NULL)
goto out_readerr;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6..437d2ca2de9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
* This is called from kswapd when we think we need some
* more memory
*/
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
spin_lock(&dq_list_lock);
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefc..0b201114a5a 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
return 0;
}
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c..04310878f44 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
/* commit.c */
int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef59..2ee3f7a6016 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
static struct shrinker xfs_buf_shake = {
.shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(0, gfp_mask);
+ xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
STATIC int
xfsbufd_wakeup(
+ struct shrinker *shrink,
int priority,
gfp_t mask)
{
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c916..80938c736c2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
goto out_cleanup_procfs;
vfs_initquota();
- xfs_inode_shrinker_init();
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
- xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bcc..a51a07c3a70 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
return last_error;
}
+/*
+ * Select the next per-ag structure to iterate during the walk. The reclaim
+ * walk is optimised only to walk AGs with reclaimable inodes in them.
+ */
+static struct xfs_perag *
+xfs_inode_ag_iter_next_pag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t *first,
+ int tag)
+{
+ struct xfs_perag *pag = NULL;
+
+ if (tag == XFS_ICI_RECLAIM_TAG) {
+ int found;
+ int ref;
+
+ spin_lock(&mp->m_perag_lock);
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, *first, 1, tag);
+ if (found <= 0) {
+ spin_unlock(&mp->m_perag_lock);
+ return NULL;
+ }
+ *first = pag->pag_agno + 1;
+ /* open coded pag reference increment */
+ ref = atomic_inc_return(&pag->pag_ref);
+ spin_unlock(&mp->m_perag_lock);
+ trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
+ } else {
+ pag = xfs_perag_get(mp, *first);
+ (*first)++;
+ }
+ return pag;
+}
+
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator(
int exclusive,
int *nr_to_scan)
{
+ struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
int nr;
nr = nr_to_scan ? *nr_to_scan : INT_MAX;
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, ag);
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive, &nr);
xfs_perag_put(pag);
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
+
+ if (!pag->pag_ici_reclaimable) {
+ /* propagate the reclaim tag up into the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
pag->pag_ici_reclaimable++;
}
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
+ if (!pag->pag_ici_reclaimable) {
+ /* clear the reclaim tag from the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
}
/*
@@ -828,83 +883,52 @@ xfs_reclaim_inodes(
/*
* Shrinker infrastructure.
- *
- * This is all far more complex than it needs to be. It adds a global list of
- * mounts because the shrinkers can only call a global context. We need to make
- * the shrinkers pass a context to avoid the need for global state.
*/
-static LIST_HEAD(xfs_mount_list);
-static struct rw_semaphore xfs_mount_list_lock;
-
static int
xfs_reclaim_inode_shrink(
+ struct shrinker *shrink,
int nr_to_scan,
gfp_t gfp_mask)
{
struct xfs_mount *mp;
struct xfs_perag *pag;
xfs_agnumber_t ag;
- int reclaimable = 0;
+ int reclaimable;
+ mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
if (nr_to_scan) {
if (!(gfp_mask & __GFP_FS))
return -1;
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+ xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- if (nr_to_scan <= 0)
- break;
- }
- up_read(&xfs_mount_list_lock);
- }
+ /* if we don't exhaust the scan, don't bother coming back */
+ if (nr_to_scan > 0)
+ return -1;
+ }
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- pag = xfs_perag_get(mp, ag);
- reclaimable += pag->pag_ici_reclaimable;
- xfs_perag_put(pag);
- }
+ reclaimable = 0;
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+ XFS_ICI_RECLAIM_TAG))) {
+ reclaimable += pag->pag_ici_reclaimable;
+ xfs_perag_put(pag);
}
- up_read(&xfs_mount_list_lock);
return reclaimable;
}
-static struct shrinker xfs_inode_shrinker = {
- .shrink = xfs_reclaim_inode_shrink,
- .seeks = DEFAULT_SEEKS,
-};
-
-void __init
-xfs_inode_shrinker_init(void)
-{
- init_rwsem(&xfs_mount_list_lock);
- register_shrinker(&xfs_inode_shrinker);
-}
-
-void
-xfs_inode_shrinker_destroy(void)
-{
- ASSERT(list_empty(&xfs_mount_list));
- unregister_shrinker(&xfs_inode_shrinker);
-}
-
void
xfs_inode_shrinker_register(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_add_tail(&mp->m_mplist, &xfs_mount_list);
- up_write(&xfs_mount_list_lock);
+ mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
+ mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&mp->m_inode_shrink);
}
void
xfs_inode_shrinker_unregister(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_del(&mp->m_mplist);
- up_write(&xfs_mount_list_lock);
+ unregister_shrinker(&mp->m_inode_shrink);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca988..e28139aaa4a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag, int write_lock, int *nr_to_scan);
-void xfs_inode_shrinker_init(void);
-void xfs_inode_shrinker_destroy(void);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 73d5aa11738..30282069090 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 8c117ff2e3a..67c018392d6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(int, gfp_t);
+STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
static struct shrinker xfs_qm_shaker = {
.shrink = xfs_qm_shake,
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
*/
/* ARGSUSED */
STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t gfp_mask)
{
int ndqused, nfree, n;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4ed..5761087ee8e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
wait_queue_head_t m_wait_single_sync_task;
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
- struct list_head m_mplist; /* inode shrinker mount list */
+ struct shrinker m_inode_shrink; /* inode reclaim shrinker */
} xfs_mount_t;
/*
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e287863ac05..de6b1722cdc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -48,6 +48,31 @@ extern ssize_t arch_cpu_release(const char *, size_t);
#endif
struct notifier_block;
+/*
+ * CPU notifier priorities.
+ */
+enum {
+ /*
+ * SCHED_ACTIVE marks a cpu which is coming up active during
+ * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
+ * notifier. CPUSET_ACTIVE adjusts cpuset according to
+ * cpu_active mask right after SCHED_ACTIVE. During
+ * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
+ * ordered in the similar way.
+ *
+ * This ordering guarantees consistent cpu_active mask and
+ * migration behavior to all cpu notifiers.
+ */
+ CPU_PRI_SCHED_ACTIVE = INT_MAX,
+ CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
+ CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
+ CPU_PRI_CPUSET_INACTIVE = INT_MIN,
+
+ /* migration should happen before other stuff but after perf */
+ CPU_PRI_PERF = 20,
+ CPU_PRI_MIGRATION = 10,
+};
+
#ifdef CONFIG_SMP
/* Need to know about CPUs going up/down? */
#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 457ed765a11..f20eb8f1602 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
extern int cpuset_init(void);
extern void cpuset_init_smp(void);
+extern void cpuset_update_active_cpus(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}
+static inline void cpuset_update_active_cpus(void)
+{
+ partition_sched_domains(1, NULL, NULL);
+}
+
static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask)
{
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 8e5a9dfb76b..e7445df44d6 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -873,6 +873,8 @@ struct fb_info {
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct)
+ max_num * sizeof(struct aperture), GFP_KERNEL);
+ if (!a)
+ return NULL;
a->count = max_num;
return a;
}
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 013dc529e95..d147461bc27 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -61,7 +61,8 @@ struct files_struct {
(rcu_dereference_check((fdtfd), \
rcu_read_lock_held() || \
lockdep_is_held(&(files)->file_lock) || \
- atomic_read(&(files)->count) == 1))
+ atomic_read(&(files)->count) == 1 || \
+ rcu_my_thread_group_empty()))
#define files_fdtable(files) \
(rcu_dereference_check_fdtable((files), (files)->fdt))
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a4d2e9f7088..adf832dec3f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
struct jbd2_buffer_trigger_type {
/*
- * Fired just before a buffer is written to the journal.
- * mapped_data is a mapped buffer that is the frozen data for
- * commit.
+ * Fired a the moment data to write to the journal are known to be
+ * stable - so either at the moment b_frozen_data is created or just
+ * before a buffer is written to the journal. mapped_data is a mapped
+ * buffer that is the frozen data for commit.
*/
- void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+ void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
struct buffer_head *bh, void *mapped_data,
size_t size);
@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
struct buffer_head *bh);
};
-extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
void *mapped_data,
struct jbd2_buffer_trigger_type *triggers);
extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b969efb0378..a2b48041b91 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
* querying the cache size, so a fastpath for that case is appropriate.
*/
struct shrinker {
- int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+ int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
int seeks; /* seeks to recreate an obj */
/* These are for internal use */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cb00845f15..f26fda76b87 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -288,6 +288,7 @@ struct pci_dev {
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+ resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
/* These fields are used by common fixups */
unsigned int transparent:1; /* Transparent PCI bridge */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d9498..469e03e96fe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1068,7 +1068,7 @@ static inline void perf_event_disable(struct perf_event *event) { }
#define perf_cpu_notifier(fn) \
do { \
static struct notifier_block fn##_nb __cpuinitdata = \
- { .notifier_call = fn, .priority = 20 }; \
+ { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_STARTING, \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e0bb86de99..2091ea2a2c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -271,13 +271,10 @@ extern int runqueue_is_locked(int cpu);
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
-extern int select_nohz_load_balancer(int cpu);
-extern int get_nohz_load_balancer(void);
+extern void select_nohz_load_balancer(int stop_tick);
+extern int get_nohz_timer_target(void);
#else
-static inline int select_nohz_load_balancer(int cpu)
-{
- return 0;
-}
+static inline void select_nohz_load_balancer(int stop_tick) { }
#endif
/*
@@ -798,7 +795,7 @@ enum cpu_idle_type {
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
-
+#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
enum powersavings_balance_level {
@@ -833,6 +830,8 @@ static inline int sd_balance_for_package_power(void)
return SD_PREFER_SIBLING;
}
+extern int __weak arch_sd_sibiling_asym_packing(void);
+
/*
* Optimise SD flags for power savings:
* SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
@@ -854,7 +853,7 @@ struct sched_group {
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
* single CPU.
*/
- unsigned int cpu_power;
+ unsigned int cpu_power, cpu_power_orig;
/*
* The CPUs this group covers.
@@ -1690,6 +1689,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
+#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
@@ -1784,20 +1784,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
#endif
/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
*/
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-extern int sched_clock_stable;
-#endif
-
-/* ftrace calls sched_clock() directly */
extern unsigned long long notrace sched_clock(void);
+/*
+ * See the comment in kernel/sched_clock.c
+ */
+extern u64 cpu_clock(int cpu);
+extern u64 local_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
extern void sched_clock_init(void);
-extern u64 sched_clock_cpu(int cpu);
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline void sched_clock_tick(void)
@@ -1812,17 +1815,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
{
}
#else
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable;
+
extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif
-/*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
- */
-extern unsigned long long cpu_clock(int cpu);
-
extern unsigned long long
task_sched_runtime(struct task_struct *task);
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index c44df50a05a..b572e432d2f 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
+ | arch_sd_sibling_asym_packing() \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index c9a97597699..814f294d4cd 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -29,6 +29,7 @@
*/
#ifndef LINUX_VGA_H
+#define LINUX_VGA_H
#include <asm/vga.h>
diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h
index fd882261225..9696a5e2c43 100644
--- a/include/math-emu/op-common.h
+++ b/include/math-emu/op-common.h
@@ -799,7 +799,7 @@ do { \
X##_e -= (_FP_W_TYPE_SIZE - rsize); \
X##_e = rsize - X##_e - 1; \
\
- if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e) \
+ if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs <= X##_e) \
__FP_FRAC_SRS_1(ur_, (X##_e - _FP_WFRACBITS_##fs + 1), rsize);\
_FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize); \
if ((_FP_WFRACBITS_##fs - X##_e - 1) > 0) \
diff --git a/include/net/sock.h b/include/net/sock.h
index 731150d5279..0a691ea7654 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
static inline int sk_tx_queue_get(const struct sock *sk)
{
- return sk->sk_tx_queue_mapping;
-}
-
-static inline bool sk_tx_queue_recorded(const struct sock *sk)
-{
- return (sk && sk->sk_tx_queue_mapping >= 0);
+ return sk ? sk->sk_tx_queue_mapping : -1;
}
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
diff --git a/ipc/sem.c b/ipc/sem.c
index 506c8491a8d..40a8f462a82 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1256,6 +1256,33 @@ out:
return un;
}
+
+/**
+ * get_queue_result - Retrieve the result code from sem_queue
+ * @q: Pointer to queue structure
+ *
+ * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
+ * q->status, then we must loop until the value is replaced with the final
+ * value: This may happen if a task is woken up by an unrelated event (e.g.
+ * signal) and in parallel the task is woken up by another task because it got
+ * the requested semaphores.
+ *
+ * The function can be called with or without holding the semaphore spinlock.
+ */
+static int get_queue_result(struct sem_queue *q)
+{
+ int error;
+
+ error = q->status;
+ while (unlikely(error == IN_WAKEUP)) {
+ cpu_relax();
+ error = q->status;
+ }
+
+ return error;
+}
+
+
SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
unsigned, nsops, const struct timespec __user *, timeout)
{
@@ -1409,15 +1436,18 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
else
schedule();
- error = queue.status;
- while(unlikely(error == IN_WAKEUP)) {
- cpu_relax();
- error = queue.status;
- }
+ error = get_queue_result(&queue);
if (error != -EINTR) {
/* fast path: update_queue already obtained all requested
- * resources */
+ * resources.
+ * Perform a smp_mb(): User space could assume that semop()
+ * is a memory barrier: Without the mb(), the cpu could
+ * speculatively read in user space stale data that was
+ * overwritten by the previous owner of the semaphore.
+ */
+ smp_mb();
+
goto out_free;
}
@@ -1427,10 +1457,12 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
goto out_free;
}
+ error = get_queue_result(&queue);
+
/*
* If queue.status != -EINTR we are woken up by another process
*/
- error = queue.status;
+
if (error != -EINTR) {
goto out_unlock_free;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 97d1b426a4a..f6e726f1849 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
return -EINVAL;
cpu_hotplug_begin();
- set_cpu_active(cpu, false);
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
- set_cpu_active(cpu, true);
-
nr_calls--;
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n",
@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
- set_cpu_active(cpu, true);
/* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
goto out_notify;
BUG_ON(!cpu_online(cpu));
- set_cpu_active(cpu, true);
-
/* Now call notifier in preparation. */
cpu_notify(CPU_ONLINE | mod, hcpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 02b9611eadd..7146793b5c1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
* but making no active use of cpusets.
*
* This routine ensures that top_cpuset.cpus_allowed tracks
- * cpu_online_map on each CPU hotplug (cpuhp) event.
+ * cpu_active_mask on each CPU hotplug (cpuhp) event.
*
* Called within get_online_cpus(). Needs to call cgroup_lock()
* before calling generate_sched_domains().
*/
-static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
- unsigned long phase, void *unused_cpu)
+void cpuset_update_active_cpus(void)
{
struct sched_domain_attr *attr;
cpumask_var_t *doms;
int ndoms;
- switch (phase) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- break;
-
- default:
- return NOTIFY_DONE;
- }
-
cgroup_lock();
mutex_lock(&callback_mutex);
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);
-
- return NOTIFY_OK;
}
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
- hotcpu_notifier(cpuset_track_online_cpus, 0);
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
cpuset_wq = create_singlethread_workqueue("cpuset");
diff --git a/kernel/early_res.c b/kernel/early_res.c
index 31aa9332ef3..7bfae887f21 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -7,6 +7,8 @@
#include <linux/bootmem.h>
#include <linux/mm.h>
#include <linux/early_res.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
/*
* Early reserved memory areas.
@@ -319,6 +321,8 @@ void __init free_early(u64 start, u64 end)
struct early_res *r;
int i;
+ kmemleak_free_part(__va(start), end - start);
+
i = find_overlapped_early(start, end);
r = &early_res[i];
if (i >= max_early_res || r->end != end || r->start != start)
@@ -333,6 +337,8 @@ void __init free_early_partial(u64 start, u64 end)
struct early_res *r;
int i;
+ kmemleak_free_part(__va(start), end - start);
+
if (start == end)
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index b6cce14ba04..a82a65cef74 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
- new_flags &= ~PF_SUPERPRIV;
+ new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
new_flags |= PF_FORKNOEXEC;
new_flags |= PF_STARTING;
p->flags = new_flags;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 5c69e996bd0..e934339fbbe 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
static int hrtimer_get_target(int this_cpu, int pinned)
{
#ifdef CONFIG_NO_HZ
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
- int preferred_cpu = get_nohz_load_balancer();
-
- if (preferred_cpu >= 0)
- return preferred_cpu;
- }
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
+ return get_nohz_timer_target();
#endif
return this_cpu;
}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 54286798c37..f2852a51023 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
static inline u64 lockstat_clock(void)
{
- return cpu_clock(smp_processor_id());
+ return local_clock();
}
static int lock_point(unsigned long points[], unsigned long ip)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff86c558af4..7e32b51ff04 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
static inline u64 perf_clock(void)
{
- return cpu_clock(raw_smp_processor_id());
+ return local_clock();
}
/*
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9829646d399..f66bdd33a6c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -232,31 +232,24 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{
- struct sighand_struct *sighand;
- struct signal_struct *sig;
+ struct signal_struct *sig = tsk->signal;
struct task_struct *t;
- *times = INIT_CPUTIME;
+ times->utime = sig->utime;
+ times->stime = sig->stime;
+ times->sum_exec_runtime = sig->sum_sched_runtime;
rcu_read_lock();
- sighand = rcu_dereference(tsk->sighand);
- if (!sighand)
+ /* make sure we can trust tsk->thread_group list */
+ if (!likely(pid_alive(tsk)))
goto out;
- sig = tsk->signal;
-
t = tsk;
do {
times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime);
times->sum_exec_runtime += t->se.sum_exec_runtime;
-
- t = next_thread(t);
- } while (t != tsk);
-
- times->utime = cputime_add(times->utime, sig->utime);
- times->stime = cputime_add(times->stime, sig->stime);
- times->sum_exec_runtime += sig->sum_sched_runtime;
+ } while_each_thread(tsk, t);
out:
rcu_read_unlock();
}
@@ -1279,10 +1272,6 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
{
struct signal_struct *sig;
- /* tsk == current, ensure it is safe to use ->signal/sighand */
- if (unlikely(tsk->exit_state))
- return 0;
-
if (!task_cputime_zero(&tsk->cputime_expires)) {
struct task_cputime task_sample = {
.utime = tsk->utime,
@@ -1298,7 +1287,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (sig->cputimer.running) {
struct task_cputime group_sample;
- thread_group_cputimer(tsk, &group_sample);
+ spin_lock(&sig->cputimer.lock);
+ group_sample = sig->cputimer.cputime;
+ spin_unlock(&sig->cputimer.lock);
+
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
@@ -1315,6 +1307,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
{
LIST_HEAD(firing);
struct k_itimer *timer, *next;
+ unsigned long flags;
BUG_ON(!irqs_disabled());
@@ -1325,7 +1318,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
if (!fastpath_timer_check(tsk))
return;
- spin_lock(&tsk->sighand->siglock);
+ if (!lock_task_sighand(tsk, &flags))
+ return;
/*
* Here we take off tsk->signal->cpu_timers[N] and
* tsk->cpu_timers[N] all the timers that are firing, and
@@ -1347,7 +1341,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* that gets the timer lock before we do will give it up and
* spin until we've taken care of that timer below.
*/
- spin_unlock(&tsk->sighand->siglock);
+ unlock_task_sighand(tsk, &flags);
/*
* Now that all the timers on our list have the firing flag,
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 6535ac8bc6a..2e2726d790b 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -239,8 +239,7 @@ static unsigned long
rcu_random(struct rcu_random_state *rrsp)
{
if (--rrsp->rrs_count < 0) {
- rrsp->rrs_state +=
- (unsigned long)cpu_clock(raw_smp_processor_id());
+ rrsp->rrs_state += (unsigned long)local_clock();
rrsp->rrs_count = RCU_RANDOM_REFRESH;
}
rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
diff --git a/kernel/sched.c b/kernel/sched.c
index 63b4a14682f..f6c9bb6ac70 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -77,6 +77,7 @@
#include <asm/irq_regs.h>
#include "sched_cpupri.h"
+#include "workqueue_sched.h"
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
@@ -456,9 +457,10 @@ struct rq {
unsigned long nr_running;
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+ unsigned long last_load_update_tick;
#ifdef CONFIG_NO_HZ
u64 nohz_stamp;
- unsigned char in_nohz_recently;
+ unsigned char nohz_balance_kick;
#endif
unsigned int skip_clock_update;
@@ -1193,6 +1195,27 @@ static void resched_cpu(int cpu)
#ifdef CONFIG_NO_HZ
/*
+ * In the semi idle case, use the nearest busy cpu for migrating timers
+ * from an idle cpu. This is good for power-savings.
+ *
+ * We don't do similar optimization for completely idle system, as
+ * selecting an idle cpu will add more delays to the timers than intended
+ * (as that cpu's timer base may not be uptodate wrt jiffies etc).
+ */
+int get_nohz_timer_target(void)
+{
+ int cpu = smp_processor_id();
+ int i;
+ struct sched_domain *sd;
+
+ for_each_domain(cpu, sd) {
+ for_each_cpu(i, sched_domain_span(sd))
+ if (!idle_cpu(i))
+ return i;
+ }
+ return cpu;
+}
+/*
* When add_timer_on() enqueues a timer into the timer wheel of an
* idle CPU then this timer might expire before the next timer event
* which is scheduled to wake up that CPU. In case of a completely
@@ -1642,7 +1665,7 @@ static void update_shares(struct sched_domain *sd)
if (root_task_group_empty())
return;
- now = cpu_clock(raw_smp_processor_id());
+ now = local_clock();
elapsed = now - sd->last_update;
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
@@ -1795,6 +1818,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
static void calc_load_account_idle(struct rq *this_rq);
static void update_sysctl(void);
static int get_update_sysctl_factor(void);
+static void update_cpu_load(struct rq *this_rq);
static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
{
@@ -2257,11 +2281,55 @@ static void update_avg(u64 *avg, u64 sample)
}
#endif
-/***
+static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
+ bool is_sync, bool is_migrate, bool is_local,
+ unsigned long en_flags)
+{
+ schedstat_inc(p, se.statistics.nr_wakeups);
+ if (is_sync)
+ schedstat_inc(p, se.statistics.nr_wakeups_sync);
+ if (is_migrate)
+ schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+ if (is_local)
+ schedstat_inc(p, se.statistics.nr_wakeups_local);
+ else
+ schedstat_inc(p, se.statistics.nr_wakeups_remote);
+
+ activate_task(rq, p, en_flags);
+}
+
+static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
+ int wake_flags, bool success)
+{
+ trace_sched_wakeup(p, success);
+ check_preempt_curr(rq, p, wake_flags);
+
+ p->state = TASK_RUNNING;
+#ifdef CONFIG_SMP
+ if (p->sched_class->task_woken)
+ p->sched_class->task_woken(rq, p);
+
+ if (unlikely(rq->idle_stamp)) {
+ u64 delta = rq->clock - rq->idle_stamp;
+ u64 max = 2*sysctl_sched_migration_cost;
+
+ if (delta > max)
+ rq->avg_idle = max;
+ else
+ update_avg(&rq->avg_idle, delta);
+ rq->idle_stamp = 0;
+ }
+#endif
+ /* if a worker is waking up, notify workqueue */
+ if ((p->flags & PF_WQ_WORKER) && success)
+ wq_worker_waking_up(p, cpu_of(rq));
+}
+
+/**
* try_to_wake_up - wake up a thread
- * @p: the to-be-woken-up thread
+ * @p: the thread to be awakened
* @state: the mask of task states that can be woken
- * @sync: do a synchronous wakeup?
+ * @wake_flags: wake modifier flags (WF_*)
*
* Put it on the run-queue if it's not already there. The "current"
* thread is always on the run-queue (except when the actual
@@ -2269,7 +2337,8 @@ static void update_avg(u64 *avg, u64 sample)
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
*
- * returns failure only if the task is already active.
+ * Returns %true if @p was woken up, %false if it was already running
+ * or @state didn't match @p's state.
*/
static int try_to_wake_up(struct task_struct *p, unsigned int state,
int wake_flags)
@@ -2349,38 +2418,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
out_activate:
#endif /* CONFIG_SMP */
- schedstat_inc(p, se.statistics.nr_wakeups);
- if (wake_flags & WF_SYNC)
- schedstat_inc(p, se.statistics.nr_wakeups_sync);
- if (orig_cpu != cpu)
- schedstat_inc(p, se.statistics.nr_wakeups_migrate);
- if (cpu == this_cpu)
- schedstat_inc(p, se.statistics.nr_wakeups_local);
- else
- schedstat_inc(p, se.statistics.nr_wakeups_remote);
- activate_task(rq, p, en_flags);
+ ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
+ cpu == this_cpu, en_flags);
success = 1;
-
out_running:
- trace_sched_wakeup(p, success);
- check_preempt_curr(rq, p, wake_flags);
-
- p->state = TASK_RUNNING;
-#ifdef CONFIG_SMP
- if (p->sched_class->task_woken)
- p->sched_class->task_woken(rq, p);
-
- if (unlikely(rq->idle_stamp)) {
- u64 delta = rq->clock - rq->idle_stamp;
- u64 max = 2*sysctl_sched_migration_cost;
-
- if (delta > max)
- rq->avg_idle = max;
- else
- update_avg(&rq->avg_idle, delta);
- rq->idle_stamp = 0;
- }
-#endif
+ ttwu_post_activation(p, rq, wake_flags, success);
out:
task_rq_unlock(rq, &flags);
put_cpu();
@@ -2389,6 +2431,37 @@ out:
}
/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not alredy there. The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task. this_rq() stays locked over invocation.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+ struct rq *rq = task_rq(p);
+ bool success = false;
+
+ BUG_ON(rq != this_rq());
+ BUG_ON(p == current);
+ lockdep_assert_held(&rq->lock);
+
+ if (!(p->state & TASK_NORMAL))
+ return;
+
+ if (!p->se.on_rq) {
+ if (likely(!task_running(rq, p))) {
+ schedstat_inc(rq, ttwu_count);
+ schedstat_inc(rq, ttwu_local);
+ }
+ ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
+ success = true;
+ }
+ ttwu_post_activation(p, rq, 0, success);
+}
+
+/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
@@ -3002,23 +3075,102 @@ static void calc_load_account_active(struct rq *this_rq)
}
/*
+ * The exact cpuload at various idx values, calculated at every tick would be
+ * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
+ *
+ * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
+ * on nth tick when cpu may be busy, then we have:
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
+ * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
+ *
+ * decay_load_missed() below does efficient calculation of
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
+ * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
+ *
+ * The calculation is approximated on a 128 point scale.
+ * degrade_zero_ticks is the number of ticks after which load at any
+ * particular idx is approximated to be zero.
+ * degrade_factor is a precomputed table, a row for each load idx.
+ * Each column corresponds to degradation factor for a power of two ticks,
+ * based on 128 point scale.
+ * Example:
+ * row 2, col 3 (=12) says that the degradation at load idx 2 after
+ * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
+ *
+ * With this power of 2 load factors, we can degrade the load n times
+ * by looking at 1 bits in n and doing as many mult/shift instead of
+ * n mult/shifts needed by the exact degradation.
+ */
+#define DEGRADE_SHIFT 7
+static const unsigned char
+ degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
+static const unsigned char
+ degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ {64, 32, 8, 0, 0, 0, 0, 0},
+ {96, 72, 40, 12, 1, 0, 0},
+ {112, 98, 75, 43, 15, 1, 0},
+ {120, 112, 98, 76, 45, 16, 2} };
+
+/*
+ * Update cpu_load for any missed ticks, due to tickless idle. The backlog
+ * would be when CPU is idle and so we just decay the old load without
+ * adding any new load.
+ */
+static unsigned long
+decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
+{
+ int j = 0;
+
+ if (!missed_updates)
+ return load;
+
+ if (missed_updates >= degrade_zero_ticks[idx])
+ return 0;
+
+ if (idx == 1)
+ return load >> missed_updates;
+
+ while (missed_updates) {
+ if (missed_updates % 2)
+ load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
+
+ missed_updates >>= 1;
+ j++;
+ }
+ return load;
+}
+
+/*
* Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC).
+ * scheduler tick (TICK_NSEC). With tickless idle this will not be called
+ * every tick. We fix it up based on jiffies.
*/
static void update_cpu_load(struct rq *this_rq)
{
unsigned long this_load = this_rq->load.weight;
+ unsigned long curr_jiffies = jiffies;
+ unsigned long pending_updates;
int i, scale;
this_rq->nr_load_updates++;
+ /* Avoid repeated calls on same jiffy, when moving in and out of idle */
+ if (curr_jiffies == this_rq->last_load_update_tick)
+ return;
+
+ pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+ this_rq->last_load_update_tick = curr_jiffies;
+
/* Update our load: */
- for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
+ this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
+ for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
unsigned long old_load, new_load;
/* scale is effectively 1 << i now, and >> i divides by scale */
old_load = this_rq->cpu_load[i];
+ old_load = decay_load_missed(old_load, pending_updates - 1, i);
new_load = this_load;
/*
* Round up the averaging division if load is increasing. This
@@ -3026,9 +3178,15 @@ static void update_cpu_load(struct rq *this_rq)
* example.
*/
if (new_load > old_load)
- new_load += scale-1;
- this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
+ new_load += scale - 1;
+
+ this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
}
+}
+
+static void update_cpu_load_active(struct rq *this_rq)
+{
+ update_cpu_load(this_rq);
calc_load_account_active(this_rq);
}
@@ -3416,7 +3574,7 @@ void scheduler_tick(void)
raw_spin_lock(&rq->lock);
update_rq_clock(rq);
- update_cpu_load(rq);
+ update_cpu_load_active(rq);
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
@@ -3588,7 +3746,6 @@ need_resched:
rq = cpu_rq(cpu);
rcu_note_context_switch(cpu);
prev = rq->curr;
- switch_count = &prev->nivcsw;
release_kernel_lock(prev);
need_resched_nonpreemptible:
@@ -3601,11 +3758,26 @@ need_resched_nonpreemptible:
raw_spin_lock_irq(&rq->lock);
clear_tsk_need_resched(prev);
+ switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
- if (unlikely(signal_pending_state(prev->state, prev)))
+ if (unlikely(signal_pending_state(prev->state, prev))) {
prev->state = TASK_RUNNING;
- else
+ } else {
+ /*
+ * If a worker is going to sleep, notify and
+ * ask workqueue whether it wants to wake up a
+ * task to maintain concurrency. If so, wake
+ * up the task.
+ */
+ if (prev->flags & PF_WQ_WORKER) {
+ struct task_struct *to_wakeup;
+
+ to_wakeup = wq_worker_sleeping(prev, cpu);
+ if (to_wakeup)
+ try_to_wake_up_local(to_wakeup);
+ }
deactivate_task(rq, prev, DEQUEUE_SLEEP);
+ }
switch_count = &prev->nvcsw;
}
@@ -3627,8 +3799,10 @@ need_resched_nonpreemptible:
context_switch(rq, prev, next); /* unlocks the rq */
/*
- * the context switch might have flipped the stack from under
- * us, hence refresh the local variables.
+ * The context switch have flipped the stack from under us
+ * and restored the local variables which were saved when
+ * this task called schedule() in the past. prev == current
+ * is still correct, but it can be moved to another cpu/rq.
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
@@ -3637,11 +3811,8 @@ need_resched_nonpreemptible:
post_schedule(rq);
- if (unlikely(reacquire_kernel_lock(current) < 0)) {
- prev = rq->curr;
- switch_count = &prev->nivcsw;
+ if (unlikely(reacquire_kernel_lock(prev)))
goto need_resched_nonpreemptible;
- }
preempt_enable_no_resched();
if (need_resched())
@@ -4431,12 +4602,8 @@ recheck:
*/
if (user && !capable(CAP_SYS_NICE)) {
if (rt_policy(policy)) {
- unsigned long rlim_rtprio;
-
- if (!lock_task_sighand(p, &flags))
- return -ESRCH;
- rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
- unlock_task_sighand(p, &flags);
+ unsigned long rlim_rtprio =
+ task_rlimit(p, RLIMIT_RTPRIO);
/* can't set/change the rt policy */
if (policy != p->policy && !rlim_rtprio)
@@ -5806,20 +5973,49 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
*/
static struct notifier_block __cpuinitdata migration_notifier = {
.notifier_call = migration_call,
- .priority = 10
+ .priority = CPU_PRI_MIGRATION,
};
+static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
+ set_cpu_active((long)hcpu, true);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ set_cpu_active((long)hcpu, false);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int __init migration_init(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err;
- /* Start one for the boot CPU: */
+ /* Initialize migration for the boot CPU */
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
BUG_ON(err == NOTIFY_BAD);
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
+ /* Register cpu active notifiers */
+ cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
+ cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
+
return 0;
}
early_initcall(migration_init);
@@ -6054,23 +6250,18 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
free_rootdomain(old_rd);
}
-static int init_rootdomain(struct root_domain *rd, bool bootmem)
+static int init_rootdomain(struct root_domain *rd)
{
- gfp_t gfp = GFP_KERNEL;
-
memset(rd, 0, sizeof(*rd));
- if (bootmem)
- gfp = GFP_NOWAIT;
-
- if (!alloc_cpumask_var(&rd->span, gfp))
+ if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
goto out;
- if (!alloc_cpumask_var(&rd->online, gfp))
+ if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
goto free_span;
- if (!alloc_cpumask_var(&rd->rto_mask, gfp))
+ if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
goto free_online;
- if (cpupri_init(&rd->cpupri, bootmem) != 0)
+ if (cpupri_init(&rd->cpupri) != 0)
goto free_rto_mask;
return 0;
@@ -6086,7 +6277,7 @@ out:
static void init_defrootdomain(void)
{
- init_rootdomain(&def_root_domain, true);
+ init_rootdomain(&def_root_domain);
atomic_set(&def_root_domain.refcount, 1);
}
@@ -6099,7 +6290,7 @@ static struct root_domain *alloc_rootdomain(void)
if (!rd)
return NULL;
- if (init_rootdomain(rd, false) != 0) {
+ if (init_rootdomain(rd) != 0) {
kfree(rd);
return NULL;
}
@@ -7278,29 +7469,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
-#ifndef CONFIG_CPUSETS
/*
- * Add online and remove offline CPUs from the scheduler domains.
- * When cpusets are enabled they take over this function.
+ * Update cpusets according to cpu_active mask. If cpusets are
+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+ * around partition_sched_domains().
*/
-static int update_sched_domains(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
{
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- partition_sched_domains(1, NULL, NULL);
+ cpuset_update_active_cpus();
return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ cpuset_update_active_cpus();
+ return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
-#endif
static int update_runtime(struct notifier_block *nfb,
unsigned long action, void *hcpu)
@@ -7346,10 +7543,8 @@ void __init sched_init_smp(void)
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
-#ifndef CONFIG_CPUSETS
- /* XXX: Theoretical race here - CPU may be hotplugged now */
- hotcpu_notifier(update_sched_domains, 0);
-#endif
+ hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+ hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
/* RT runtime code needs to handle some hotplug events */
hotcpu_notifier(update_runtime, 0);
@@ -7594,6 +7789,9 @@ void __init sched_init(void)
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
rq->cpu_load[j] = 0;
+
+ rq->last_load_update_tick = jiffies;
+
#ifdef CONFIG_SMP
rq->sd = NULL;
rq->rd = NULL;
@@ -7607,6 +7805,10 @@ void __init sched_init(void)
rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq_attach_root(rq, &def_root_domain);
+#ifdef CONFIG_NO_HZ
+ rq->nohz_balance_kick = 0;
+ init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i));
+#endif
#endif
init_rq_hrtick(rq);
atomic_set(&rq->nr_iowait, 0);
@@ -7651,8 +7853,11 @@ void __init sched_init(void)
zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ
- zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
- alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
+ zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
+ alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
+ atomic_set(&nohz.load_balancer, nr_cpu_ids);
+ atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
+ atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
#endif
/* May be allocated at isolcpus cmdline parse time */
if (cpu_isolated_map == NULL)
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 906a0f718cb..52f1a149bfb 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -10,19 +10,55 @@
* Ingo Molnar <mingo@redhat.com>
* Guillaume Chazarain <guichaz@gmail.com>
*
- * Create a semi stable clock from a mixture of other events, including:
- * - gtod
+ *
+ * What:
+ *
+ * cpu_clock(i) provides a fast (execution time) high resolution
+ * clock with bounded drift between CPUs. The value of cpu_clock(i)
+ * is monotonic for constant i. The timestamp returned is in nanoseconds.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !! #
+ * ####################################################################
+ *
+ * There is no strict promise about the base, although it tends to start
+ * at 0 on boot (but people really shouldn't rely on that).
+ *
+ * cpu_clock(i) -- can be used from any context, including NMI.
+ * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
+ * local_clock() -- is cpu_clock() on the current cpu.
+ *
+ * How:
+ *
+ * The implementation either uses sched_clock() when
+ * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
+ * sched_clock() is assumed to provide these properties (mostly it means
+ * the architecture provides a globally synchronized highres time source).
+ *
+ * Otherwise it tries to create a semi stable clock from a mixture of other
+ * clocks, including:
+ *
+ * - GTOD (clock monotomic)
* - sched_clock()
* - explicit idle events
*
- * We use gtod as base and the unstable clock deltas. The deltas are filtered,
- * making it monotonic and keeping it within an expected window.
+ * We use GTOD as base and use sched_clock() deltas to improve resolution. The
+ * deltas are filtered to provide monotonicity and keeping it within an
+ * expected window.
*
* Furthermore, explicit sleep and wakeup hooks allow us to account for time
* that is otherwise invisible (TSC gets stopped).
*
- * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
- * consistent between cpus (never more than 2 jiffies difference).
+ *
+ * Notes:
+ *
+ * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
+ * like cpufreq interrupts that can change the base clock (TSC) multiplier
+ * and cause funny jumps in time -- although the filtering provided by
+ * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
+ * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
+ * sched_clock().
*/
#include <linux/spinlock.h>
#include <linux/hardirq.h>
@@ -170,6 +206,11 @@ again:
return val;
}
+/*
+ * Similar to cpu_clock(), but requires local IRQs to be disabled.
+ *
+ * See cpu_clock().
+ */
u64 sched_clock_cpu(int cpu)
{
struct sched_clock_data *scd;
@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-unsigned long long cpu_clock(int cpu)
+/*
+ * As outlined at the top, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !! #
+ * ####################################################################
+ */
+u64 cpu_clock(int cpu)
{
- unsigned long long clock;
+ u64 clock;
unsigned long flags;
local_irq_save(flags);
@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu)
return clock;
}
+/*
+ * Similar to cpu_clock() for the current cpu. Time will only be observed
+ * to be monotonic if care is taken to only compare timestampt taken on the
+ * same CPU.
+ *
+ * See cpu_clock().
+ */
+u64 local_clock(void)
+{
+ u64 clock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ clock = sched_clock_cpu(smp_processor_id());
+ local_irq_restore(flags);
+
+ return clock;
+}
+
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void sched_clock_init(void)
@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu)
return sched_clock();
}
-
-unsigned long long cpu_clock(int cpu)
+u64 cpu_clock(int cpu)
{
return sched_clock_cpu(cpu);
}
+u64 local_clock(void)
+{
+ return sched_clock_cpu(0);
+}
+
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
EXPORT_SYMBOL_GPL(cpu_clock);
+EXPORT_SYMBOL_GPL(local_clock);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index e6871cb3fc8..2722dc1b413 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -166,14 +166,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
*
* Returns: -ENOMEM if memory fails.
*/
-int cpupri_init(struct cpupri *cp, bool bootmem)
+int cpupri_init(struct cpupri *cp)
{
- gfp_t gfp = GFP_KERNEL;
int i;
- if (bootmem)
- gfp = GFP_NOWAIT;
-
memset(cp, 0, sizeof(*cp));
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
@@ -181,7 +177,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
raw_spin_lock_init(&vec->lock);
vec->count = 0;
- if (!zalloc_cpumask_var(&vec->mask, gfp))
+ if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup;
}
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index 7cb5bb6b95b..9fc7d386fea 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -27,7 +27,7 @@ struct cpupri {
int cpupri_find(struct cpupri *cp,
struct task_struct *p, struct cpumask *lowest_mask);
void cpupri_set(struct cpupri *cp, int cpu, int pri);
-int cpupri_init(struct cpupri *cp, bool bootmem);
+int cpupri_init(struct cpupri *cp);
void cpupri_cleanup(struct cpupri *cp);
#else
#define cpupri_set(cp, cpu, pri) do { } while (0)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 35565395d00..2e1b0d17dd9 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -332,7 +332,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
PN(sysctl_sched_latency);
PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity);
- PN(sysctl_sched_child_runs_first);
+ P(sysctl_sched_child_runs_first);
P(sysctl_sched_features);
#undef PN
#undef P
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a878b5332da..806d1b227a2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2287,13 +2287,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
unsigned long power = SCHED_LOAD_SCALE;
struct sched_group *sdg = sd->groups;
- if (sched_feat(ARCH_POWER))
- power *= arch_scale_freq_power(sd, cpu);
- else
- power *= default_scale_freq_power(sd, cpu);
-
- power >>= SCHED_LOAD_SHIFT;
-
if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
if (sched_feat(ARCH_POWER))
power *= arch_scale_smt_power(sd, cpu);
@@ -2303,6 +2296,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
power >>= SCHED_LOAD_SHIFT;
}
+ sdg->cpu_power_orig = power;
+
+ if (sched_feat(ARCH_POWER))
+ power *= arch_scale_freq_power(sd, cpu);
+ else
+ power *= default_scale_freq_power(sd, cpu);
+
+ power >>= SCHED_LOAD_SHIFT;
+
power *= scale_rt_power(cpu);
power >>= SCHED_LOAD_SHIFT;
@@ -2335,6 +2337,31 @@ static void update_group_power(struct sched_domain *sd, int cpu)
sdg->cpu_power = power;
}
+/*
+ * Try and fix up capacity for tiny siblings, this is needed when
+ * things like SD_ASYM_PACKING need f_b_g to select another sibling
+ * which on its own isn't powerful enough.
+ *
+ * See update_sd_pick_busiest() and check_asym_packing().
+ */
+static inline int
+fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+{
+ /*
+ * Only siblings can have significantly less than SCHED_LOAD_SCALE
+ */
+ if (sd->level != SD_LV_SIBLING)
+ return 0;
+
+ /*
+ * If ~90% of the cpu_power is still there, we're good.
+ */
+ if (group->cpu_power * 32 > group->cpu_power_orig * 29)
+ return 1;
+
+ return 0;
+}
+
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @sd: The sched_domain whose statistics are to be updated.
@@ -2400,14 +2427,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
* domains. In the newly idle case, we will allow all the cpu's
* to do the newly idle load balance.
*/
- if (idle != CPU_NEWLY_IDLE && local_group &&
- balance_cpu != this_cpu) {
- *balance = 0;
- return;
+ if (idle != CPU_NEWLY_IDLE && local_group) {
+ if (balance_cpu != this_cpu) {
+ *balance = 0;
+ return;
+ }
+ update_group_power(sd, this_cpu);
}
- update_group_power(sd, this_cpu);
-
/* Adjust by relative CPU power of the group */
sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
@@ -2428,6 +2455,51 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_capacity =
DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+ if (!sgs->group_capacity)
+ sgs->group_capacity = fix_small_capacity(sd, group);
+}
+
+/**
+ * update_sd_pick_busiest - return 1 on busiest group
+ * @sd: sched_domain whose statistics are to be checked
+ * @sds: sched_domain statistics
+ * @sg: sched_group candidate to be checked for being the busiest
+ * @sgs: sched_group statistics
+ * @this_cpu: the current cpu
+ *
+ * Determine if @sg is a busier group than the previously selected
+ * busiest group.
+ */
+static bool update_sd_pick_busiest(struct sched_domain *sd,
+ struct sd_lb_stats *sds,
+ struct sched_group *sg,
+ struct sg_lb_stats *sgs,
+ int this_cpu)
+{
+ if (sgs->avg_load <= sds->max_load)
+ return false;
+
+ if (sgs->sum_nr_running > sgs->group_capacity)
+ return true;
+
+ if (sgs->group_imb)
+ return true;
+
+ /*
+ * ASYM_PACKING needs to move all the work to the lowest
+ * numbered CPUs in the group, therefore mark all groups
+ * higher than ourself as busy.
+ */
+ if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
+ this_cpu < group_first_cpu(sg)) {
+ if (!sds->busiest)
+ return true;
+
+ if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
+ return true;
+ }
+
+ return false;
}
/**
@@ -2435,7 +2507,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
* @sd: sched_domain whose statistics are to be updated.
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
- * @sd_idle: Idle status of the sched_domain containing group.
+ * @sd_idle: Idle status of the sched_domain containing sg.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
* @sds: variable to hold the statistics for this sched_domain.
@@ -2446,7 +2518,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
struct sd_lb_stats *sds)
{
struct sched_domain *child = sd->child;
- struct sched_group *group = sd->groups;
+ struct sched_group *sg = sd->groups;
struct sg_lb_stats sgs;
int load_idx, prefer_sibling = 0;
@@ -2459,21 +2531,20 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
do {
int local_group;
- local_group = cpumask_test_cpu(this_cpu,
- sched_group_cpus(group));
+ local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
+ update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle,
local_group, cpus, balance, &sgs);
if (local_group && !(*balance))
return;
sds->total_load += sgs.group_load;
- sds->total_pwr += group->cpu_power;
+ sds->total_pwr += sg->cpu_power;
/*
* In case the child domain prefers tasks go to siblings
- * first, lower the group capacity to one so that we'll try
+ * first, lower the sg capacity to one so that we'll try
* and move all the excess tasks away.
*/
if (prefer_sibling)
@@ -2481,23 +2552,72 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
if (local_group) {
sds->this_load = sgs.avg_load;
- sds->this = group;
+ sds->this = sg;
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
- } else if (sgs.avg_load > sds->max_load &&
- (sgs.sum_nr_running > sgs.group_capacity ||
- sgs.group_imb)) {
+ } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
sds->max_load = sgs.avg_load;
- sds->busiest = group;
+ sds->busiest = sg;
sds->busiest_nr_running = sgs.sum_nr_running;
sds->busiest_group_capacity = sgs.group_capacity;
sds->busiest_load_per_task = sgs.sum_weighted_load;
sds->group_imb = sgs.group_imb;
}
- update_sd_power_savings_stats(group, sds, local_group, &sgs);
- group = group->next;
- } while (group != sd->groups);
+ update_sd_power_savings_stats(sg, sds, local_group, &sgs);
+ sg = sg->next;
+ } while (sg != sd->groups);
+}
+
+int __weak arch_sd_sibling_asym_packing(void)
+{
+ return 0*SD_ASYM_PACKING;
+}
+
+/**
+ * check_asym_packing - Check to see if the group is packed into the
+ * sched doman.
+ *
+ * This is primarily intended to used at the sibling level. Some
+ * cores like POWER7 prefer to use lower numbered SMT threads. In the
+ * case of POWER7, it can move to lower SMT modes only when higher
+ * threads are idle. When in lower SMT modes, the threads will
+ * perform better since they share less core resources. Hence when we
+ * have idle threads, we want them to be the higher ones.
+ *
+ * This packing function is run on idle threads. It checks to see if
+ * the busiest CPU in this domain (core in the P7 case) has a higher
+ * CPU number than the packing function is being run on. Here we are
+ * assuming lower CPU number will be equivalent to lower a SMT thread
+ * number.
+ *
+ * Returns 1 when packing is required and a task should be moved to
+ * this CPU. The amount of the imbalance is returned in *imbalance.
+ *
+ * @sd: The sched_domain whose packing is to be checked.
+ * @sds: Statistics of the sched_domain which is to be packed
+ * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+ * @imbalance: returns amount of imbalanced due to packing.
+ */
+static int check_asym_packing(struct sched_domain *sd,
+ struct sd_lb_stats *sds,
+ int this_cpu, unsigned long *imbalance)
+{
+ int busiest_cpu;
+
+ if (!(sd->flags & SD_ASYM_PACKING))
+ return 0;
+
+ if (!sds->busiest)
+ return 0;
+
+ busiest_cpu = group_first_cpu(sds->busiest);
+ if (this_cpu > busiest_cpu)
+ return 0;
+
+ *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+ SCHED_LOAD_SCALE);
+ return 1;
}
/**
@@ -2692,6 +2812,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (!(*balance))
goto ret;
+ if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
+ check_asym_packing(sd, &sds, this_cpu, imbalance))
+ return sds.busiest;
+
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
@@ -2726,8 +2850,9 @@ ret:
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
static struct rq *
-find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
- unsigned long imbalance, const struct cpumask *cpus)
+find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
+ enum cpu_idle_type idle, unsigned long imbalance,
+ const struct cpumask *cpus)
{
struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
@@ -2738,6 +2863,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
unsigned long wl;
+ if (!capacity)
+ capacity = fix_small_capacity(sd, group);
+
if (!cpumask_test_cpu(i, cpus))
continue;
@@ -2777,9 +2905,19 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
-static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
+static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
+ int busiest_cpu, int this_cpu)
{
if (idle == CPU_NEWLY_IDLE) {
+
+ /*
+ * ASYM_PACKING needs to force migrate tasks from busy but
+ * higher numbered CPUs in order to pack all tasks in the
+ * lowest numbered CPUs.
+ */
+ if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
+ return 1;
+
/*
* The only task running in a non-idle cpu can be moved to this
* cpu in an attempt to completely freeup the other CPU
@@ -2854,7 +2992,7 @@ redo:
goto out_balanced;
}
- busiest = find_busiest_queue(group, idle, imbalance, cpus);
+ busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
@@ -2898,7 +3036,8 @@ redo:
schedstat_inc(sd, lb_failed[idle]);
sd->nr_balance_failed++;
- if (need_active_balance(sd, sd_idle, idle)) {
+ if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
+ this_cpu)) {
raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the active_load_balance_cpu_stop,
@@ -3093,13 +3232,40 @@ out_unlock:
}
#ifdef CONFIG_NO_HZ
+
+static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb);
+
+static void trigger_sched_softirq(void *data)
+{
+ raise_softirq_irqoff(SCHED_SOFTIRQ);
+}
+
+static inline void init_sched_softirq_csd(struct call_single_data *csd)
+{
+ csd->func = trigger_sched_softirq;
+ csd->info = NULL;
+ csd->flags = 0;
+ csd->priv = 0;
+}
+
+/*
+ * idle load balancing details
+ * - One of the idle CPUs nominates itself as idle load_balancer, while
+ * entering idle.
+ * - This idle load balancer CPU will also go into tickless mode when
+ * it is idle, just like all other idle CPUs
+ * - When one of the busy CPUs notice that there may be an idle rebalancing
+ * needed, they will kick the idle load balancer, which then does idle
+ * load balancing for all the idle CPUs.
+ */
static struct {
atomic_t load_balancer;
- cpumask_var_t cpu_mask;
- cpumask_var_t ilb_grp_nohz_mask;
-} nohz ____cacheline_aligned = {
- .load_balancer = ATOMIC_INIT(-1),
-};
+ atomic_t first_pick_cpu;
+ atomic_t second_pick_cpu;
+ cpumask_var_t idle_cpus_mask;
+ cpumask_var_t grp_idle_mask;
+ unsigned long next_balance; /* in jiffy units */
+} nohz ____cacheline_aligned;
int get_nohz_load_balancer(void)
{
@@ -3153,17 +3319,17 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
*/
static inline int is_semi_idle_group(struct sched_group *ilb_group)
{
- cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+ cpumask_and(nohz.grp_idle_mask, nohz.idle_cpus_mask,
sched_group_cpus(ilb_group));
/*
* A sched_group is semi-idle when it has atleast one busy cpu
* and atleast one idle cpu.
*/
- if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+ if (cpumask_empty(nohz.grp_idle_mask))
return 0;
- if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+ if (cpumask_equal(nohz.grp_idle_mask, sched_group_cpus(ilb_group)))
return 0;
return 1;
@@ -3196,7 +3362,7 @@ static int find_new_ilb(int cpu)
* Optimize for the case when we have no idle CPUs or only one
* idle CPU. Don't walk the sched_domain hierarchy in such cases
*/
- if (cpumask_weight(nohz.cpu_mask) < 2)
+ if (cpumask_weight(nohz.idle_cpus_mask) < 2)
goto out_done;
for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
@@ -3204,7 +3370,7 @@ static int find_new_ilb(int cpu)
do {
if (is_semi_idle_group(ilb_group))
- return cpumask_first(nohz.ilb_grp_nohz_mask);
+ return cpumask_first(nohz.grp_idle_mask);
ilb_group = ilb_group->next;
@@ -3212,98 +3378,116 @@ static int find_new_ilb(int cpu)
}
out_done:
- return cpumask_first(nohz.cpu_mask);
+ return nr_cpu_ids;
}
#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
static inline int find_new_ilb(int call_cpu)
{
- return cpumask_first(nohz.cpu_mask);
+ return nr_cpu_ids;
}
#endif
/*
+ * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
+ * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
+ * CPU (if there is one).
+ */
+static void nohz_balancer_kick(int cpu)
+{
+ int ilb_cpu;
+
+ nohz.next_balance++;
+
+ ilb_cpu = get_nohz_load_balancer();
+
+ if (ilb_cpu >= nr_cpu_ids) {
+ ilb_cpu = cpumask_first(nohz.idle_cpus_mask);
+ if (ilb_cpu >= nr_cpu_ids)
+ return;
+ }
+
+ if (!cpu_rq(ilb_cpu)->nohz_balance_kick) {
+ struct call_single_data *cp;
+
+ cpu_rq(ilb_cpu)->nohz_balance_kick = 1;
+ cp = &per_cpu(remote_sched_softirq_cb, cpu);
+ __smp_call_function_single(ilb_cpu, cp, 0);
+ }
+ return;
+}
+
+/*
* This routine will try to nominate the ilb (idle load balancing)
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
- * load balancing on behalf of all those cpus. If all the cpus in the system
- * go into this tickless mode, then there will be no ilb owner (as there is
- * no need for one) and all the cpus will sleep till the next wakeup event
- * arrives...
- *
- * For the ilb owner, tick is not stopped. And this tick will be used
- * for idle load balancing. ilb owner will still be part of
- * nohz.cpu_mask..
+ * load balancing on behalf of all those cpus.
*
- * While stopping the tick, this cpu will become the ilb owner if there
- * is no other owner. And will be the owner till that cpu becomes busy
- * or if all cpus in the system stop their ticks at which point
- * there is no need for ilb owner.
+ * When the ilb owner becomes busy, we will not have new ilb owner until some
+ * idle CPU wakes up and goes back to idle or some busy CPU tries to kick
+ * idle load balancing by kicking one of the idle CPUs.
*
- * When the ilb owner becomes busy, it nominates another owner, during the
- * next busy scheduler_tick()
+ * Ticks are stopped for the ilb owner as well, with busy CPU kicking this
+ * ilb owner CPU in future (when there is a need for idle load balancing on
+ * behalf of all idle CPUs).
*/
-int select_nohz_load_balancer(int stop_tick)
+void select_nohz_load_balancer(int stop_tick)
{
int cpu = smp_processor_id();
if (stop_tick) {
- cpu_rq(cpu)->in_nohz_recently = 1;
-
if (!cpu_active(cpu)) {
if (atomic_read(&nohz.load_balancer) != cpu)
- return 0;
+ return;
/*
* If we are going offline and still the leader,
* give up!
*/
- if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
+ if (atomic_cmpxchg(&nohz.load_balancer, cpu,
+ nr_cpu_ids) != cpu)
BUG();
- return 0;
+ return;
}
- cpumask_set_cpu(cpu, nohz.cpu_mask);
+ cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
- /* time for ilb owner also to sleep */
- if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
- if (atomic_read(&nohz.load_balancer) == cpu)
- atomic_set(&nohz.load_balancer, -1);
- return 0;
- }
+ if (atomic_read(&nohz.first_pick_cpu) == cpu)
+ atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids);
+ if (atomic_read(&nohz.second_pick_cpu) == cpu)
+ atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
- if (atomic_read(&nohz.load_balancer) == -1) {
- /* make me the ilb owner */
- if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
- return 1;
- } else if (atomic_read(&nohz.load_balancer) == cpu) {
+ if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) {
int new_ilb;
- if (!(sched_smt_power_savings ||
- sched_mc_power_savings))
- return 1;
+ /* make me the ilb owner */
+ if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids,
+ cpu) != nr_cpu_ids)
+ return;
+
/*
* Check to see if there is a more power-efficient
* ilb.
*/
new_ilb = find_new_ilb(cpu);
if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
- atomic_set(&nohz.load_balancer, -1);
+ atomic_set(&nohz.load_balancer, nr_cpu_ids);
resched_cpu(new_ilb);
- return 0;
+ return;
}
- return 1;
+ return;
}
} else {
- if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
- return 0;
+ if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
+ return;
- cpumask_clear_cpu(cpu, nohz.cpu_mask);
+ cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
if (atomic_read(&nohz.load_balancer) == cpu)
- if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
+ if (atomic_cmpxchg(&nohz.load_balancer, cpu,
+ nr_cpu_ids) != cpu)
BUG();
}
- return 0;
+ return;
}
#endif
@@ -3385,11 +3569,102 @@ out:
rq->next_balance = next_balance;
}
+#ifdef CONFIG_NO_HZ
/*
- * run_rebalance_domains is triggered when needed from the scheduler tick.
- * In CONFIG_NO_HZ case, the idle load balance owner will do the
+ * In CONFIG_NO_HZ case, the idle balance kickee will do the
* rebalancing for all the cpus for whom scheduler ticks are stopped.
*/
+static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
+{
+ struct rq *this_rq = cpu_rq(this_cpu);
+ struct rq *rq;
+ int balance_cpu;
+
+ if (idle != CPU_IDLE || !this_rq->nohz_balance_kick)
+ return;
+
+ for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
+ if (balance_cpu == this_cpu)
+ continue;
+
+ /*
+ * If this cpu gets work to do, stop the load balancing
+ * work being done for other cpus. Next load
+ * balancing owner will pick it up.
+ */
+ if (need_resched()) {
+ this_rq->nohz_balance_kick = 0;
+ break;
+ }
+
+ raw_spin_lock_irq(&this_rq->lock);
+ update_rq_clock(this_rq);
+ update_cpu_load(this_rq);
+ raw_spin_unlock_irq(&this_rq->lock);
+
+ rebalance_domains(balance_cpu, CPU_IDLE);
+
+ rq = cpu_rq(balance_cpu);
+ if (time_after(this_rq->next_balance, rq->next_balance))
+ this_rq->next_balance = rq->next_balance;
+ }
+ nohz.next_balance = this_rq->next_balance;
+ this_rq->nohz_balance_kick = 0;
+}
+
+/*
+ * Current heuristic for kicking the idle load balancer
+ * - first_pick_cpu is the one of the busy CPUs. It will kick
+ * idle load balancer when it has more than one process active. This
+ * eliminates the need for idle load balancing altogether when we have
+ * only one running process in the system (common case).
+ * - If there are more than one busy CPU, idle load balancer may have
+ * to run for active_load_balance to happen (i.e., two busy CPUs are
+ * SMT or core siblings and can run better if they move to different
+ * physical CPUs). So, second_pick_cpu is the second of the busy CPUs
+ * which will kick idle load balancer as soon as it has any load.
+ */
+static inline int nohz_kick_needed(struct rq *rq, int cpu)
+{
+ unsigned long now = jiffies;
+ int ret;
+ int first_pick_cpu, second_pick_cpu;
+
+ if (time_before(now, nohz.next_balance))
+ return 0;
+
+ if (!rq->nr_running)
+ return 0;
+
+ first_pick_cpu = atomic_read(&nohz.first_pick_cpu);
+ second_pick_cpu = atomic_read(&nohz.second_pick_cpu);
+
+ if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu &&
+ second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu)
+ return 0;
+
+ ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu);
+ if (ret == nr_cpu_ids || ret == cpu) {
+ atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
+ if (rq->nr_running > 1)
+ return 1;
+ } else {
+ ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu);
+ if (ret == nr_cpu_ids || ret == cpu) {
+ if (rq->nr_running)
+ return 1;
+ }
+ }
+ return 0;
+}
+#else
+static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
+#endif
+
+/*
+ * run_rebalance_domains is triggered when needed from the scheduler tick.
+ * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
+ */
static void run_rebalance_domains(struct softirq_action *h)
{
int this_cpu = smp_processor_id();
@@ -3399,37 +3674,12 @@ static void run_rebalance_domains(struct softirq_action *h)
rebalance_domains(this_cpu, idle);
-#ifdef CONFIG_NO_HZ
/*
- * If this cpu is the owner for idle load balancing, then do the
+ * If this cpu has a pending nohz_balance_kick, then do the
* balancing on behalf of the other idle cpus whose ticks are
* stopped.
*/
- if (this_rq->idle_at_tick &&
- atomic_read(&nohz.load_balancer) == this_cpu) {
- struct rq *rq;
- int balance_cpu;
-
- for_each_cpu(balance_cpu, nohz.cpu_mask) {
- if (balance_cpu == this_cpu)
- continue;
-
- /*
- * If this cpu gets work to do, stop the load balancing
- * work being done for other cpus. Next load
- * balancing owner will pick it up.
- */
- if (need_resched())
- break;
-
- rebalance_domains(balance_cpu, CPU_IDLE);
-
- rq = cpu_rq(balance_cpu);
- if (time_after(this_rq->next_balance, rq->next_balance))
- this_rq->next_balance = rq->next_balance;
- }
- }
-#endif
+ nohz_idle_balance(this_cpu, idle);
}
static inline int on_null_domain(int cpu)
@@ -3439,57 +3689,17 @@ static inline int on_null_domain(int cpu)
/*
* Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
- *
- * In case of CONFIG_NO_HZ, this is the place where we nominate a new
- * idle load balancing owner or decide to stop the periodic load balancing,
- * if the whole system is idle.
*/
static inline void trigger_load_balance(struct rq *rq, int cpu)
{
-#ifdef CONFIG_NO_HZ
- /*
- * If we were in the nohz mode recently and busy at the current
- * scheduler tick, then check if we need to nominate new idle
- * load balancer.
- */
- if (rq->in_nohz_recently && !rq->idle_at_tick) {
- rq->in_nohz_recently = 0;
-
- if (atomic_read(&nohz.load_balancer) == cpu) {
- cpumask_clear_cpu(cpu, nohz.cpu_mask);
- atomic_set(&nohz.load_balancer, -1);
- }
-
- if (atomic_read(&nohz.load_balancer) == -1) {
- int ilb = find_new_ilb(cpu);
-
- if (ilb < nr_cpu_ids)
- resched_cpu(ilb);
- }
- }
-
- /*
- * If this cpu is idle and doing idle load balancing for all the
- * cpus with ticks stopped, is it time for that to stop?
- */
- if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
- cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
- resched_cpu(cpu);
- return;
- }
-
- /*
- * If this cpu is idle and the idle load balancing is done by
- * someone else, then no need raise the SCHED_SOFTIRQ
- */
- if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
- cpumask_test_cpu(cpu, nohz.cpu_mask))
- return;
-#endif
/* Don't need to rebalance while attached to NULL domain */
if (time_after_eq(jiffies, rq->next_balance) &&
likely(!on_null_domain(cpu)))
raise_softirq(SCHED_SOFTIRQ);
+#ifdef CONFIG_NO_HZ
+ else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
+ nohz_balancer_kick(cpu);
+#endif
}
static void rq_online_fair(struct rq *rq)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8afb953e31c..d10c80ebb67 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1663,9 +1663,6 @@ static void watchdog(struct rq *rq, struct task_struct *p)
{
unsigned long soft, hard;
- if (!p->signal)
- return;
-
/* max may change after cur was read, this will be fixed next tick */
soft = task_rlimit(p, RLIMIT_RTTIME);
hard = task_rlimit_max(p, RLIMIT_RTTIME);
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 32d2bd4061b..25c2f962f6f 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -295,13 +295,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
static inline void account_group_user_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct thread_group_cputimer *cputimer;
-
- /* tsk == current, ensure it is safe to use ->signal */
- if (unlikely(tsk->exit_state))
- return;
-
- cputimer = &tsk->signal->cputimer;
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
if (!cputimer->running)
return;
@@ -325,13 +319,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
static inline void account_group_system_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct thread_group_cputimer *cputimer;
-
- /* tsk == current, ensure it is safe to use ->signal */
- if (unlikely(tsk->exit_state))
- return;
-
- cputimer = &tsk->signal->cputimer;
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
if (!cputimer->running)
return;
@@ -355,16 +343,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns)
{
- struct thread_group_cputimer *cputimer;
- struct signal_struct *sig;
-
- sig = tsk->signal;
- /* see __exit_signal()->task_rq_unlock_wait() */
- barrier();
- if (unlikely(!sig))
- return;
-
- cputimer = &sig->cputimer;
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
if (!cputimer->running)
return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f898af60817..021d2f878f1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -405,13 +405,7 @@ void tick_nohz_stop_sched_tick(int inidle)
* the scheduler tick in nohz_restart_sched_tick.
*/
if (!ts->tick_stopped) {
- if (select_nohz_load_balancer(1)) {
- /*
- * sched tick not stopped!
- */
- cpumask_clear_cpu(cpu, nohz_cpu_mask);
- goto out;
- }
+ select_nohz_load_balancer(1);
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
diff --git a/kernel/timer.c b/kernel/timer.c
index ee305c8d4e1..48d6aec0789 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -679,12 +679,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- int preferred_cpu = get_nohz_load_balancer();
-
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
+ cpu = get_nohz_timer_target();
#endif
new_base = per_cpu(tvec_bases, cpu);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1..1723e2b8c58 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -56,7 +56,7 @@ u64 notrace trace_clock_local(void)
*/
u64 notrace trace_clock(void)
{
- return cpu_clock(raw_smp_processor_id());
+ return local_clock();
}
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
new file mode 100644
index 00000000000..af040babb74
--- /dev/null
+++ b/kernel/workqueue_sched.h
@@ -0,0 +1,16 @@
+/*
+ * kernel/workqueue_sched.h
+ *
+ * Scheduler hooks for concurrency managed workqueue. Only to be
+ * included from sched.c and workqueue.c.
+ */
+static inline void wq_worker_waking_up(struct task_struct *task,
+ unsigned int cpu)
+{
+}
+
+static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
+ unsigned int cpu)
+{
+ return NULL;
+}
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 58c66cc5056..142c84a5499 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, -1ULL);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
#endif
+
+ return ptr;
}
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#endif
+ return ptr;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 431214b941a..9bd339eb04c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
int i;
void *ptr;
+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
@@ -3659,6 +3662,11 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
ptr = phys_to_virt(addr);
memset(ptr, 0, size);
reserve_early_without_check(addr, addr + size, "BOOTMEM");
+ /*
+ * The min_count is set to 0 so that bootmem allocated blocks
+ * are never reported as leaks.
+ */
+ kmemleak_alloc(ptr, size, 0, 0);
return ptr;
}
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6c0081441a3..5bffada7cde 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -9,6 +9,7 @@
#include <linux/vmalloc.h>
#include <linux/cgroup.h>
#include <linux/swapops.h>
+#include <linux/kmemleak.h>
static void __meminit
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -126,6 +127,12 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
if (!base)
base = vmalloc(table_size);
}
+ /*
+ * The value stored in section->page_cgroup is (base - pfn)
+ * and it does not point to the memory block allocated above,
+ * causing kmemleak false positives.
+ */
+ kmemleak_not_leak(base);
} else {
/*
* We don't have to allocate page_cgroup again, but
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c7e57cc63a..b94fe1b3da4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
unsigned long total_scan;
- unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
+ unsigned long max_pass;
+ max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
delta = (4 * scanned) / shrinker->seeks;
delta *= max_pass;
do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
int shrink_ret;
int nr_before;
- nr_before = (*shrinker->shrink)(0, gfp_mask);
- shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
+ nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
+ shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
+ gfp_mask);
if (shrink_ret == -1)
break;
if (shrink_ret < nr_before)
@@ -296,7 +298,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
- lock_page(page);
+ lock_page_nosync(page);
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
unlock_page(page);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b10e3cdb08f..800b6b9fbba 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -358,6 +358,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
acl->sec_level = sec_level;
acl->auth_type = auth_type;
hci_acl_connect(acl);
+ } else {
+ if (acl->sec_level < sec_level)
+ acl->sec_level = sec_level;
+ if (acl->auth_type < auth_type)
+ acl->auth_type = auth_type;
}
if (type == ACL_LINK)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6c57fc71c7e..786b5de0bac 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1049,6 +1049,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
if (conn) {
if (!ev->status)
conn->link_mode |= HCI_LM_AUTH;
+ else
+ conn->sec_level = BT_SECURITY_LOW;
clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1b682a5aa06..cf3c4073a8a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -401,6 +401,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
l2cap_send_sframe(pi, control);
}
+static inline int __l2cap_no_conn_pending(struct sock *sk)
+{
+ return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
+}
+
static void l2cap_do_start(struct sock *sk)
{
struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -409,12 +414,13 @@ static void l2cap_do_start(struct sock *sk)
if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
return;
- if (l2cap_check_security(sk)) {
+ if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
struct l2cap_conn_req req;
req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
req.psm = l2cap_pi(sk)->psm;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+ l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_REQ, sizeof(req), &req);
@@ -464,12 +470,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
}
if (sk->sk_state == BT_CONNECT) {
- if (l2cap_check_security(sk)) {
+ if (l2cap_check_security(sk) &&
+ __l2cap_no_conn_pending(sk)) {
struct l2cap_conn_req req;
req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
req.psm = l2cap_pi(sk)->psm;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+ l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_REQ, sizeof(req), &req);
@@ -2912,7 +2920,6 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
l2cap_pi(sk)->ident = 0;
l2cap_pi(sk)->dcid = dcid;
l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
-
l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
@@ -4404,6 +4411,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
req.psm = l2cap_pi(sk)->psm;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+ l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_REQ, sizeof(req), &req);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820..753fc4221f3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -217,14 +217,6 @@ static bool br_devices_support_netpoll(struct net_bridge *br)
return count != 0 && ret;
}
-static void br_poll_controller(struct net_device *br_dev)
-{
- struct netpoll *np = br_dev->npinfo->netpoll;
-
- if (np->real_dev != br_dev)
- netpoll_poll_dev(np->real_dev);
-}
-
void br_netpoll_cleanup(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -295,7 +287,6 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_do_ioctl = br_dev_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_cleanup = br_netpoll_cleanup,
- .ndo_poll_controller = br_poll_controller,
#endif
};
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a4e72a89e4f..595da45f908 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
kfree_skb(skb);
else {
skb_push(skb, ETH_HLEN);
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
- if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
- netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
- skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
- } else
-#endif
- dev_queue_xmit(skb);
+ dev_queue_xmit(skb);
}
}
@@ -73,23 +66,9 @@ int br_forward_finish(struct sk_buff *skb)
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
-#ifdef CONFIG_NET_POLL_CONTROLLER
- struct net_bridge *br = to->br;
- if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
- struct netpoll *np;
- to->dev->npinfo = skb->dev->npinfo;
- np = skb->dev->npinfo->netpoll;
- np->real_dev = np->dev = to->dev;
- to->dev->priv_flags |= IFF_IN_NETPOLL;
- }
-#endif
skb->dev = to->dev;
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
br_forward_finish);
-#ifdef CONFIG_NET_POLL_CONTROLLER
- if (skb->dev->npinfo)
- skb->dev->npinfo->netpoll->dev = br->dev;
-#endif
}
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
diff --git a/net/core/dev.c b/net/core/dev.c
index 723a34710ad..0ea10f849be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1911,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb)
*/
static inline void skb_orphan_try(struct sk_buff *skb)
{
- if (!skb_tx(skb)->flags)
+ struct sock *sk = skb->sk;
+
+ if (sk && !skb_tx(skb)->flags) {
+ /* skb_tx_hash() wont be able to get sk.
+ * We copy sk_hash into skb->rxhash
+ */
+ if (!skb->rxhash)
+ skb->rxhash = sk->sk_hash;
skb_orphan(skb);
+ }
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1998,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
- hash = (__force u16) skb->protocol;
-
+ hash = (__force u16) skb->protocol ^ skb->rxhash;
hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
@@ -2022,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
- u16 queue_index;
+ int queue_index;
struct sock *sk = skb->sk;
- if (sk_tx_queue_recorded(sk)) {
- queue_index = sk_tx_queue_get(sk);
- } else {
+ queue_index = sk_tx_queue_get(sk);
+ if (queue_index < 0) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue) {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6ba1c0eece0..a4e0a7482c2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
- = neigh->dev->header_ops->cache_update;
+ = NULL;
+
+ if (neigh->dev->header_ops)
+ update = neigh->dev->header_ops->cache_update;
if (update) {
for (hh = neigh->hh; hh; hh = hh->hh_next) {
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c51b55400dc..11201784d29 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
menuconfig NET_DSA
bool "Distributed Switch Architecture support"
default n
- depends on EXPERIMENTAL && !S390
+ depends on EXPERIMENTAL && NET_ETHERNET && !S390
select PHYLIB
---help---
This allows you to use hardware switch chips that use
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 757f25eb9b4..7f6273506ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
int err;
err = ipmr_fib_lookup(net, &fl, &mrt);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(skb);
return err;
+ }
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
@@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb)
goto dont_forward;
err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(skb);
return err;
+ }
if (!local) {
if (IPCB(skb)->opt.router_alert) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6596b4feedd..65afeaec15b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
ssize_t spliced;
int ret;
+ sock_rps_record_flow(sk);
/*
* We can't seek on a socket input
*/
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201..7ed9dc1042d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
int mib_idx;
int fwd_rexmitting = 0;
+ if (!tp->packets_out)
+ return;
+
if (!tp->lost_out)
tp->retransmit_high = tp->snd_una;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 2794b600283..d6e9599d070 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type =
static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
int err = rt2->rt_hdr.nexthdr;
spin_lock(&x->lock);
- if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+ if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
!ipv6_addr_any((struct in6_addr *)x->coaddr))
err = -ENOENT;
spin_unlock(&x->lock);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 94d72e85a47..b2a3ae6cad7 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
newsk = NULL;
goto out;
}
+ kfree_skb(oskb);
sock_hold(sk);
pep_sk(newsk)->listener = sk;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 570949417f3..724553e8ed7 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
{
struct icmphdr *icmph;
- if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+ if (!pskb_may_pull(skb, ihl + sizeof(*icmph)))
goto drop;
icmph = (void *)(skb_network_header(skb) + ihl);
@@ -215,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
(icmph->type != ICMP_PARAMETERPROB))
break;
+ if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+ goto drop;
+
iph = (void *)(icmph + 1);
if (egress)
addr = iph->daddr;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 73affb8624f..8dc47f1d000 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -267,7 +267,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
* Run memory cache shrinker.
*/
static int
-rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(free);
int res;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index af1c173be4a..a7ec5a8a238 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
/* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
- if (err < 0) {
- if (err != -EAGAIN)
+ if (err <= 0) {
+ if (err != 0 && err != -EAGAIN)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
return ERR_PTR(err);
}
@@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
goto make_dummy_bundle;
dst_hold(&xdst->u.dst);
return oldflo;
+ } else if (new_xdst == NULL) {
+ num_xfrms = 0;
+ if (oldflo == NULL)
+ goto make_dummy_bundle;
+ xdst->num_xfrms = 0;
+ dst_hold(&xdst->u.dst);
+ return oldflo;
}
/* Kill the previous bundle */
@@ -1760,6 +1767,10 @@ restart:
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
goto dropdst;
+ } else if (xdst == NULL) {
+ num_xfrms = 0;
+ drop_pols = num_pols;
+ goto no_transform;
}
spin_lock_bh(&xfrm_policy_sk_bundle_lock);
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 31ac5538fe7..5da30eb6ad0 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -83,8 +83,8 @@ config SND_SOC_ALL_CODECS
config SND_SOC_WM_HUBS
tristate
- default y if SND_SOC_WM8993=y
- default m if SND_SOC_WM8993=m
+ default y if SND_SOC_WM8993=y || SND_SOC_WM8994=y
+ default m if SND_SOC_WM8993=m || SND_SOC_WM8994=m
config SND_SOC_AC97_CODEC
tristate
diff --git a/sound/soc/codecs/wm8727.c b/sound/soc/codecs/wm8727.c
index 1072621e93f..9d1df262813 100644
--- a/sound/soc/codecs/wm8727.c
+++ b/sound/soc/codecs/wm8727.c
@@ -127,6 +127,8 @@ static __devinit int wm8727_platform_probe(struct platform_device *pdev)
goto err_codec;
}
+ return 0;
+
err_codec:
snd_soc_unregister_codec(codec);
err:
diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c
index 7e4a627b4c7..4e212ed62ea 100644
--- a/sound/soc/codecs/wm8776.c
+++ b/sound/soc/codecs/wm8776.c
@@ -94,7 +94,6 @@ SOC_DAPM_SINGLE("Bypass Switch", WM8776_OUTMUX, 2, 1, 0),
static const struct snd_soc_dapm_widget wm8776_dapm_widgets[] = {
SND_SOC_DAPM_INPUT("AUX"),
-SND_SOC_DAPM_INPUT("AUX"),
SND_SOC_DAPM_INPUT("AIN1"),
SND_SOC_DAPM_INPUT("AIN2"),
diff --git a/sound/soc/codecs/wm8988.c b/sound/soc/codecs/wm8988.c
index 0417dae32e6..19ad590ca0b 100644
--- a/sound/soc/codecs/wm8988.c
+++ b/sound/soc/codecs/wm8988.c
@@ -885,7 +885,6 @@ static int wm8988_register(struct wm8988_priv *wm8988,
ret = snd_soc_register_dai(&wm8988_dai);
if (ret != 0) {
dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
- snd_soc_unregister_codec(codec);
goto err_codec;
}
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 3396a0db06b..ec4acac49eb 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -683,20 +683,15 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
/* clock inversion (CKG2) */
data = 0;
- switch (SH_FSI_INVERSION_MASK & flags) {
- case SH_FSI_LRM_INV:
- data = 1 << 12;
- break;
- case SH_FSI_BRM_INV:
- data = 1 << 8;
- break;
- case SH_FSI_LRS_INV:
- data = 1 << 4;
- break;
- case SH_FSI_BRS_INV:
- data = 1 << 0;
- break;
- }
+ if (SH_FSI_LRM_INV & flags)
+ data |= 1 << 12;
+ if (SH_FSI_BRM_INV & flags)
+ data |= 1 << 8;
+ if (SH_FSI_LRS_INV & flags)
+ data |= 1 << 4;
+ if (SH_FSI_BRS_INV & flags)
+ data |= 1 << 0;
+
fsi_reg_write(fsi, CKG2, data);
/* do fmt, di fmt */
@@ -726,15 +721,15 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
break;
case SH_FSI_FMT_TDM:
msg = "TDM";
- data = CR_FMT(CR_TDM) | (fsi->chan - 1);
fsi->chan = is_play ?
SH_FSI_GET_CH_O(flags) : SH_FSI_GET_CH_I(flags);
+ data = CR_FMT(CR_TDM) | (fsi->chan - 1);
break;
case SH_FSI_FMT_TDM_DELAY:
msg = "TDM Delay";
- data = CR_FMT(CR_TDM_D) | (fsi->chan - 1);
fsi->chan = is_play ?
SH_FSI_GET_CH_O(flags) : SH_FSI_GET_CH_I(flags);
+ data = CR_FMT(CR_TDM_D) | (fsi->chan - 1);
break;
default:
dev_err(dai->dev, "unknown format.\n");
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
new file mode 100644
index 00000000000..15130b50dfe
--- /dev/null
+++ b/tools/perf/arch/sparc/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c
new file mode 100644
index 00000000000..0ab88483720
--- /dev/null
+++ b/tools/perf/arch/sparc/util/dwarf-regs.c
@@ -0,0 +1,43 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <libio.h>
+#include <dwarf-regs.h>
+
+#define SPARC_MAX_REGS 96
+
+const char *sparc_regs_table[SPARC_MAX_REGS] = {
+ "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
+}