diff options
581 files changed, 8174 insertions, 4906 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host new file mode 100644 index 00000000000..29a4f892e43 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-scsi_host @@ -0,0 +1,13 @@ +What: /sys/class/scsi_host/hostX/isci_id +Date: June 2011 +Contact: Dave Jiang <dave.jiang@intel.com> +Description: + This file contains the enumerated host ID for the Intel + SCU controller. The Intel(R) C600 Series Chipset SATA/SAS + Storage Control Unit embeds up to two 4-port controllers in + a single PCI device. The controllers are enumerated in order + which usually means the lowest number scsi_host corresponds + with the first controller, but this association is not + guaranteed. The 'isci_id' attribute unambiguously identifies + the controller index: '0' for the first controller, + '1' for the second. diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 6f3c598971f..06eb6d957c8 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -380,7 +380,7 @@ will be charged as a new owner of it. 5.2 stat file -5.2.1 memory.stat file includes following statistics +memory.stat file includes following statistics # per-memory cgroup local status cache - # of bytes of page cache memory. @@ -438,89 +438,6 @@ Note: file_mapped is accounted only when the memory cgroup is owner of page cache.) -5.2.2 memory.vmscan_stat - -memory.vmscan_stat includes statistics information for memory scanning and -freeing, reclaiming. The statistics shows memory scanning information since -memory cgroup creation and can be reset to 0 by writing 0 as - - #echo 0 > ../memory.vmscan_stat - -This file contains following statistics. - -[param]_[file_or_anon]_pages_by_[reason]_[under_heararchy] -[param]_elapsed_ns_by_[reason]_[under_hierarchy] - -For example, - - scanned_file_pages_by_limit indicates the number of scanned - file pages at vmscan. - -Now, 3 parameters are supported - - scanned - the number of pages scanned by vmscan - rotated - the number of pages activated at vmscan - freed - the number of pages freed by vmscan - -If "rotated" is high against scanned/freed, the memcg seems busy. - -Now, 2 reason are supported - - limit - the memory cgroup's limit - system - global memory pressure + softlimit - (global memory pressure not under softlimit is not handled now) - -When under_hierarchy is added in the tail, the number indicates the -total memcg scan of its children and itself. - -elapsed_ns is a elapsed time in nanosecond. This may include sleep time -and not indicates CPU usage. So, please take this as just showing -latency. - -Here is an example. - -# cat /cgroup/memory/A/memory.vmscan_stat -scanned_pages_by_limit 9471864 -scanned_anon_pages_by_limit 6640629 -scanned_file_pages_by_limit 2831235 -rotated_pages_by_limit 4243974 -rotated_anon_pages_by_limit 3971968 -rotated_file_pages_by_limit 272006 -freed_pages_by_limit 2318492 -freed_anon_pages_by_limit 962052 -freed_file_pages_by_limit 1356440 -elapsed_ns_by_limit 351386416101 -scanned_pages_by_system 0 -scanned_anon_pages_by_system 0 -scanned_file_pages_by_system 0 -rotated_pages_by_system 0 -rotated_anon_pages_by_system 0 -rotated_file_pages_by_system 0 -freed_pages_by_system 0 -freed_anon_pages_by_system 0 -freed_file_pages_by_system 0 -elapsed_ns_by_system 0 -scanned_pages_by_limit_under_hierarchy 9471864 -scanned_anon_pages_by_limit_under_hierarchy 6640629 -scanned_file_pages_by_limit_under_hierarchy 2831235 -rotated_pages_by_limit_under_hierarchy 4243974 -rotated_anon_pages_by_limit_under_hierarchy 3971968 -rotated_file_pages_by_limit_under_hierarchy 272006 -freed_pages_by_limit_under_hierarchy 2318492 -freed_anon_pages_by_limit_under_hierarchy 962052 -freed_file_pages_by_limit_under_hierarchy 1356440 -elapsed_ns_by_limit_under_hierarchy 351386416101 -scanned_pages_by_system_under_hierarchy 0 -scanned_anon_pages_by_system_under_hierarchy 0 -scanned_file_pages_by_system_under_hierarchy 0 -rotated_pages_by_system_under_hierarchy 0 -rotated_anon_pages_by_system_under_hierarchy 0 -rotated_file_pages_by_system_under_hierarchy 0 -freed_pages_by_system_under_hierarchy 0 -freed_anon_pages_by_system_under_hierarchy 0 -freed_file_pages_by_system_under_hierarchy 0 -elapsed_ns_by_system_under_hierarchy 0 - 5.3 swappiness Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp index fa8776ab9b1..84d46c0c71a 100644 --- a/Documentation/hwmon/coretemp +++ b/Documentation/hwmon/coretemp @@ -35,13 +35,6 @@ the Out-Of-Spec bit. Following table summarizes the exported sysfs files: All Sysfs entries are named with their core_id (represented here by 'X'). tempX_input - Core temperature (in millidegrees Celsius). tempX_max - All cooling devices should be turned on (on Core2). - Initialized with IA32_THERM_INTERRUPT. When the CPU - temperature reaches this temperature, an interrupt is - generated and tempX_max_alarm is set. -tempX_max_hyst - If the CPU temperature falls below than temperature, - an interrupt is generated and tempX_max_alarm is reset. -tempX_max_alarm - Set if the temperature reaches or exceeds tempX_max. - Reset if the temperature drops to or below tempX_max_hyst. tempX_crit - Maximum junction temperature (in millidegrees Celsius). tempX_crit_alarm - Set when Out-of-spec bit is set, never clears. Correct CPU operation is no longer guaranteed. @@ -49,9 +42,10 @@ tempX_label - Contains string "Core X", where X is processor number. For Package temp, this will be "Physical id Y", where Y is the package number. -The TjMax temperature is set to 85 degrees C if undocumented model specific -register (UMSR) 0xee has bit 30 set. If not the TjMax is 100 degrees C as -(sometimes) documented in processor datasheet. +On CPU models which support it, TjMax is read from a model-specific register. +On other models, it is set to an arbitrary value based on weak heuristics. +If these heuristics don't work for you, you can pass the correct TjMax value +as a module parameter (tjmax). Appendix A. Known TjMax lists (TBD): Some information comes from ark.intel.com diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 614d0382e2c..d6e6724446c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2086,9 +2086,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Override pmtimer IOPort with a hex value. e.g. pmtmr=0x508 - pnp.debug [PNP] - Enable PNP debug messages. This depends on the - CONFIG_PNP_DEBUG_MESSAGES option. + pnp.debug=1 [PNP] + Enable PNP debug messages (depends on the + CONFIG_PNP_DEBUG_MESSAGES option). Change at run-time + via /sys/module/pnp/parameters/debug. We always show + current resource usage; turning this on also shows + possible settings and some assignment information. pnpacpi= [ACPI] { off } @@ -2703,10 +2706,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. functions are at fixed addresses, they make nice targets for exploits that can control RIP. - emulate [default] Vsyscalls turn into traps and are - emulated reasonably safely. + emulate Vsyscalls turn into traps and are emulated + reasonably safely. - native Vsyscalls are native syscall instructions. + native [default] Vsyscalls are native syscall + instructions. This is a little bit faster than trapping and makes a few dynamic recompilers work better than they would in emulation mode. diff --git a/Documentation/networking/dmfe.txt b/Documentation/networking/dmfe.txt index 8006c227fda..25320bf19c8 100644 --- a/Documentation/networking/dmfe.txt +++ b/Documentation/networking/dmfe.txt @@ -1,3 +1,5 @@ +Note: This driver doesn't have a maintainer. + Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux. This program is free software; you can redistribute it and/or @@ -55,7 +57,6 @@ Test and make sure PCI latency is now correct for all cases. Authors: Sten Wang <sten_wang@davicom.com.tw > : Original Author -Tobias Ringstrom <tori@unhappy.mine.nu> : Current Maintainer Contributors: diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 81546990f41..ca5cdcd0f0e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1042,7 +1042,7 @@ conf/interface/*: The functional behaviour for certain settings is different depending on whether local forwarding is enabled or not. -accept_ra - BOOLEAN +accept_ra - INTEGER Accept Router Advertisements; autoconfigure using them. Possible values are: @@ -1106,7 +1106,7 @@ dad_transmits - INTEGER The amount of Duplicate Address Detection probes to send. Default: 1 -forwarding - BOOLEAN +forwarding - INTEGER Configure interface-specific Host/Router behaviour. Note: It is recommended to have the same setting on all diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index 58fd7414e6c..fe67b5c79f0 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -27,7 +27,7 @@ applying a filter to each packet that assigns it to one of a small number of logical flows. Packets for each flow are steered to a separate receive queue, which in turn can be processed by separate CPUs. This mechanism is generally known as “Receive-side Scaling” (RSS). The goal of RSS and -the other scaling techniques to increase performance uniformly. +the other scaling techniques is to increase performance uniformly. Multi-queue distribution can also be used for traffic prioritization, but that is not the focus of these techniques. @@ -186,10 +186,10 @@ are steered using plain RPS. Multiple table entries may point to the same CPU. Indeed, with many flows and few CPUs, it is very likely that a single application thread handles flows with many different flow hashes. -rps_sock_table is a global flow table that contains the *desired* CPU for -flows: the CPU that is currently processing the flow in userspace. Each -table value is a CPU index that is updated during calls to recvmsg and -sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage() +rps_sock_flow_table is a global flow table that contains the *desired* CPU +for flows: the CPU that is currently processing the flow in userspace. +Each table value is a CPU index that is updated during calls to recvmsg +and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage() and tcp_splice_read()). When the scheduler moves a thread to a new CPU while it has outstanding @@ -243,7 +243,7 @@ configured. The number of entries in the global flow table is set through: The number of entries in the per-queue flow table are set through: - /sys/class/net/<dev>/queues/tx-<n>/rps_flow_cnt + /sys/class/net/<dev>/queues/rx-<n>/rps_flow_cnt == Suggested Configuration diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 0924aaca330..29bdf62aac0 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -123,10 +123,11 @@ be automatically shutdown if it's set to "never". khugepaged runs usually at low frequency so while one may not want to invoke defrag algorithms synchronously during the page faults, it should be worth invoking defrag at least in khugepaged. However it's -also possible to disable defrag in khugepaged: +also possible to disable defrag in khugepaged by writing 0 or enable +defrag in khugepaged by writing 1: -echo yes >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag -echo no >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag +echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag +echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag You can also control how many pages khugepaged should scan at each pass: diff --git a/MAINTAINERS b/MAINTAINERS index 28f65c249b9..6185d051358 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1278,7 +1278,6 @@ F: drivers/input/misc/ati_remote2.c ATLX ETHERNET DRIVERS M: Jay Cliburn <jcliburn@gmail.com> M: Chris Snook <chris.snook@gmail.com> -M: Jie Yang <jie.yang@atheros.com> L: netdev@vger.kernel.org W: http://sourceforge.net/projects/atl1 W: http://atl1.sourceforge.net @@ -1574,7 +1573,6 @@ F: drivers/scsi/bfa/ BROCADE BNA 10 GIGABIT ETHERNET DRIVER M: Rasesh Mody <rmody@brocade.com> -M: Debashis Dutt <ddutt@brocade.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/bna/ @@ -1758,7 +1756,6 @@ F: Documentation/zh_CN/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti <benve@cisco.com> -M: Vasanthy Kolluri <vkolluri@cisco.com> M: Roopa Prabhu <roprabhu@cisco.com> M: David Wang <dwang2@cisco.com> S: Supported @@ -2463,7 +2460,7 @@ S: Supported F: drivers/infiniband/hw/ehca/ EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER -M: Breno Leitao <leitao@linux.vnet.ibm.com> +M: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/ehea/ @@ -3262,6 +3259,17 @@ F: Documentation/input/multi-touch-protocol.txt F: drivers/input/input-mt.c K: \b(ABS|SYN)_MT_ +INTEL C600 SERIES SAS CONTROLLER DRIVER +M: Intel SCU Linux support <intel-linux-scu@intel.com> +M: Dan Williams <dan.j.williams@intel.com> +M: Dave Jiang <dave.jiang@intel.com> +M: Ed Nadolski <edmund.nadolski@intel.com> +L: linux-scsi@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git +S: Maintained +F: drivers/scsi/isci/ +F: firmware/isci/ + INTEL IDLE DRIVER M: Len Brown <lenb@kernel.org> L: linux-pm@lists.linux-foundation.org @@ -3305,7 +3313,7 @@ M: David Woodhouse <dwmw2@infradead.org> L: iommu@lists.linux-foundation.org T: git git://git.infradead.org/iommu-2.6.git S: Supported -F: drivers/pci/intel-iommu.c +F: drivers/iommu/intel-iommu.c F: include/linux/intel-iommu.h INTEL IOP-ADMA DMA DRIVER @@ -4404,7 +4412,8 @@ L: netfilter@vger.kernel.org L: coreteam@netfilter.org W: http://www.netfilter.org/ W: http://www.iptables.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next-2.6.git S: Supported F: include/linux/netfilter* F: include/linux/netfilter/ @@ -4774,7 +4783,7 @@ F: drivers/net/wireless/orinoco/ OSD LIBRARY and FILESYSTEM M: Boaz Harrosh <bharrosh@panasas.com> -M: Benny Halevy <bhalevy@panasas.com> +M: Benny Halevy <bhalevy@tonian.com> L: osd-dev@open-osd.org W: http://open-osd.org T: git git://git.open-osd.org/open-osd.git @@ -6357,15 +6366,14 @@ F: net/ipv4/tcp_lp.c TEGRA SUPPORT M: Colin Cross <ccross@android.com> -M: Erik Gilling <konkers@android.com> M: Olof Johansson <olof@lixom.net> +M: Stephen Warren <swarren@nvidia.com> L: linux-tegra@vger.kernel.org -T: git git://android.git.kernel.org/kernel/tegra.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/olof/tegra.git S: Supported F: arch/arm/mach-tegra TEHUTI ETHERNET DRIVER -M: Alexander Indenbaum <baum@tehutinetworks.net> M: Andy Gospodarek <andy@greyhouse.net> L: netdev@vger.kernel.org S: Supported @@ -7200,6 +7208,9 @@ W: http://opensource.wolfsonmicro.com/content/linux-drivers-wolfson-devices S: Supported F: Documentation/hwmon/wm83?? F: drivers/leds/leds-wm83*.c +F: drivers/input/misc/wm831x-on.c +F: drivers/input/touchscreen/wm831x-ts.c +F: drivers/input/touchscreen/wm97*.c F: drivers/mfd/wm8*.c F: drivers/power/wm83*.c F: drivers/rtc/rtc-wm83*.c @@ -7209,6 +7220,7 @@ F: drivers/watchdog/wm83*_wdt.c F: include/linux/mfd/wm831x/ F: include/linux/mfd/wm8350/ F: include/linux/mfd/wm8400* +F: include/linux/wm97xx.h F: include/sound/wm????.h F: sound/soc/codecs/wm* @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = NAME = "Divemaster Edition" # *DOCUMENTATION* diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 60cde53d266..8bb936226de 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -51,7 +51,7 @@ config GENERIC_CMOS_UPDATE def_bool y config GENERIC_GPIO - def_bool y + bool config ZONE_DMA bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3269576dbfa..3146ed3f6ec 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1283,6 +1283,20 @@ config ARM_ERRATA_364296 processor into full low interrupt latency mode. ARM11MPCore is not affected. +config ARM_ERRATA_764369 + bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" + depends on CPU_V7 && SMP + help + This option enables the workaround for erratum 764369 + affecting Cortex-A9 MPCore with two or more processors (all + current revisions). Under certain timing circumstances, a data + cache line maintenance operation by MVA targeting an Inner + Shareable memory region may fail to proceed up to either the + Point of Coherency or to the Point of Unification of the + system. This workaround adds a DSB instruction before the + relevant cache maintenance functions and sets a specific bit + in the diagnostic control register of the SCU. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/boot/dts/tegra-harmony.dts b/arch/arm/boot/dts/tegra-harmony.dts index 4c053340ce3..e5818668d09 100644 --- a/arch/arm/boot/dts/tegra-harmony.dts +++ b/arch/arm/boot/dts/tegra-harmony.dts @@ -57,14 +57,14 @@ }; sdhci@c8000200 { - gpios = <&gpio 69 0>, /* cd, gpio PI5 */ - <&gpio 57 0>, /* wp, gpio PH1 */ - <&gpio 155 0>; /* power, gpio PT3 */ + cd-gpios = <&gpio 69 0>; /* gpio PI5 */ + wp-gpios = <&gpio 57 0>; /* gpio PH1 */ + power-gpios = <&gpio 155 0>; /* gpio PT3 */ }; sdhci@c8000600 { - gpios = <&gpio 58 0>, /* cd, gpio PH2 */ - <&gpio 59 0>, /* wp, gpio PH3 */ - <&gpio 70 0>; /* power, gpio PI6 */ + cd-gpios = <&gpio 58 0>; /* gpio PH2 */ + wp-gpios = <&gpio 59 0>; /* gpio PH3 */ + power-gpios = <&gpio 70 0>; /* gpio PI6 */ }; }; diff --git a/arch/arm/boot/dts/tegra-seaboard.dts b/arch/arm/boot/dts/tegra-seaboard.dts index 1940cae0074..64cedca6fc7 100644 --- a/arch/arm/boot/dts/tegra-seaboard.dts +++ b/arch/arm/boot/dts/tegra-seaboard.dts @@ -21,8 +21,8 @@ }; sdhci@c8000400 { - gpios = <&gpio 69 0>, /* cd, gpio PI5 */ - <&gpio 57 0>, /* wp, gpio PH1 */ - <&gpio 70 0>; /* power, gpio PI6 */ + cd-gpios = <&gpio 69 0>; /* gpio PI5 */ + wp-gpios = <&gpio 57 0>; /* gpio PH1 */ + power-gpios = <&gpio 70 0>; /* gpio PI6 */ }; }; diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c index 7aa4262ada7..197f81c7735 100644 --- a/arch/arm/common/vic.c +++ b/arch/arm/common/vic.c @@ -259,7 +259,6 @@ static void __init vic_disable(void __iomem *base) writel(0, base + VIC_INT_SELECT); writel(0, base + VIC_INT_ENABLE); writel(~0, base + VIC_INT_ENABLE_CLEAR); - writel(0, base + VIC_IRQ_STATUS); writel(0, base + VIC_ITCR); writel(~0, base + VIC_INT_SOFT_CLEAR); } diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 8c73900da9e..253cc86318b 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -25,17 +25,17 @@ #ifdef CONFIG_SMP -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ smp_mb(); \ __asm__ __volatile__( \ - "1: ldrex %1, [%2]\n" \ + "1: ldrex %1, [%3]\n" \ " " insn "\n" \ - "2: strex %1, %0, [%2]\n" \ - " teq %1, #0\n" \ + "2: strex %2, %0, [%3]\n" \ + " teq %2, #0\n" \ " bne 1b\n" \ " mov %0, #0\n" \ - __futex_atomic_ex_table("%4") \ - : "=&r" (ret), "=&r" (oldval) \ + __futex_atomic_ex_table("%5") \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") @@ -73,14 +73,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #include <linux/preempt.h> #include <asm/domain.h> -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1, [%2]\n" \ + "1: " T(ldr) " %1, [%3]\n" \ " " insn "\n" \ - "2: " T(str) " %0, [%2]\n" \ + "2: " T(str) " %0, [%3]\n" \ " mov %0, #0\n" \ - __futex_atomic_ex_table("%4") \ - : "=&r" (ret), "=&r" (oldval) \ + __futex_atomic_ex_table("%5") \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") @@ -117,7 +117,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; + int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -129,19 +129,19 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("mov %0, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("add %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("orr %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %0, %1, %3", ret, oldval, uaddr, ~oparg); + __futex_atomic_op("and %0, %1, %4", ret, oldval, tmp, uaddr, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("eor %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; default: ret = -ENOSYS; diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h index 080d74f8128..ff66638ff54 100644 --- a/arch/arm/include/asm/localtimer.h +++ b/arch/arm/include/asm/localtimer.h @@ -10,6 +10,8 @@ #ifndef __ASM_ARM_LOCALTIMER_H #define __ASM_ARM_LOCALTIMER_H +#include <linux/errno.h> + struct clock_event_device; /* diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 2c04ed5efeb..c60a2944f95 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -478,8 +478,8 @@ /* * Unimplemented (or alternatively implemented) syscalls */ -#define __IGNORE_fadvise64_64 1 -#define __IGNORE_migrate_pages 1 +#define __IGNORE_fadvise64_64 +#define __IGNORE_migrate_pages #endif /* __KERNEL__ */ #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4c851834f68..6be3e2e4d83 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -321,8 +321,8 @@ static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_DCACHE_REFILL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 79ed5e7f204..7fcddb75c87 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -13,6 +13,7 @@ #include <asm/smp_scu.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #define SCU_CTRL 0x00 #define SCU_CONFIG 0x04 @@ -37,6 +38,15 @@ void __init scu_enable(void __iomem *scu_base) { u32 scu_ctrl; +#ifdef CONFIG_ARM_ERRATA_764369 + /* Cortex-A9 only */ + if ((read_cpuid(CPUID_ID) & 0xff0ffff0) == 0x410fc090) { + scu_ctrl = __raw_readl(scu_base + 0x30); + if (!(scu_ctrl & 1)) + __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); + } +#endif + scu_ctrl = __raw_readl(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bf977f8514f..4e66f62b8d4 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -23,8 +23,10 @@ #if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK) #define ARM_EXIT_KEEP(x) x +#define ARM_EXIT_DISCARD(x) #else #define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x #endif OUTPUT_ARCH(arm) @@ -39,6 +41,11 @@ jiffies = jiffies_64 + 4; SECTIONS { /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + * * unwind exit sections must be discarded before the rest of the * unwind sections get included. */ @@ -47,6 +54,9 @@ SECTIONS *(.ARM.extab.exit.text) ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL #ifndef CONFIG_HOTPLUG *(.ARM.exidx.devexit.text) *(.ARM.extab.devexit.text) @@ -58,6 +68,8 @@ SECTIONS #ifndef CONFIG_SMP_ON_UP *(.alt.smp.init) #endif + *(.discard) + *(.discard.*) } #ifdef CONFIG_XIP_KERNEL @@ -279,9 +291,6 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } - - /* Default discards */ - DISCARDS } /* diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 83dce859886..a9e0dae86a2 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -158,7 +158,7 @@ void __init dove_spi0_init(void) void __init dove_spi1_init(void) { - orion_spi_init(DOVE_SPI1_PHYS_BASE, get_tclk()); + orion_spi_1_init(DOVE_SPI1_PHYS_BASE, get_tclk()); } /***************************************************************************** diff --git a/arch/arm/mach-exynos4/clock.c b/arch/arm/mach-exynos4/clock.c index 1561b036a9b..86964d2e9e1 100644 --- a/arch/arm/mach-exynos4/clock.c +++ b/arch/arm/mach-exynos4/clock.c @@ -899,8 +899,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLKDIV_CAM, .shift = 28, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "exynos4-fimc.0", + .name = "sclk_cam0", .enable = exynos4_clksrc_mask_cam_ctrl, .ctrlbit = (1 << 16), }, @@ -909,8 +908,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLKDIV_CAM, .shift = 16, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "exynos4-fimc.1", + .name = "sclk_cam1", .enable = exynos4_clksrc_mask_cam_ctrl, .ctrlbit = (1 << 20), }, @@ -1160,7 +1158,7 @@ void __init_or_cpufreq exynos4_setup_clocks(void) vpllsrc = clk_get_rate(&clk_vpllsrc.clk); vpll = s5p_get_pll46xx(vpllsrc, __raw_readl(S5P_VPLL_CON0), - __raw_readl(S5P_VPLL_CON1), pll_4650); + __raw_readl(S5P_VPLL_CON1), pll_4650c); clk_fout_apll.ops = &exynos4_fout_apll_ops; clk_fout_mpll.rate = mpll; diff --git a/arch/arm/mach-exynos4/mct.c b/arch/arm/mach-exynos4/mct.c index 1ae059b7ad7..ddd86864fb8 100644 --- a/arch/arm/mach-exynos4/mct.c +++ b/arch/arm/mach-exynos4/mct.c @@ -132,12 +132,18 @@ static cycle_t exynos4_frc_read(struct clocksource *cs) return ((cycle_t)hi << 32) | lo; } +static void exynos4_frc_resume(struct clocksource *cs) +{ + exynos4_mct_frc_start(0, 0); +} + struct clocksource mct_frc = { .name = "mct-frc", .rating = 400, .read = exynos4_frc_read, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = exynos4_frc_resume, }; static void __init exynos4_clocksource_init(void) @@ -389,9 +395,11 @@ static void exynos4_mct_tick_init(struct clock_event_device *evt) } /* Setup the local clock events for a CPU */ -void __cpuinit local_timer_setup(struct clock_event_device *evt) +int __cpuinit local_timer_setup(struct clock_event_device *evt) { exynos4_mct_tick_init(evt); + + return 0; } int local_timer_ack(void) diff --git a/arch/arm/mach-exynos4/platsmp.c b/arch/arm/mach-exynos4/platsmp.c index 7c2282c6ba8..df6ef1b2f98 100644 --- a/arch/arm/mach-exynos4/platsmp.c +++ b/arch/arm/mach-exynos4/platsmp.c @@ -106,6 +106,8 @@ void __cpuinit platform_secondary_init(unsigned int cpu) */ spin_lock(&boot_lock); spin_unlock(&boot_lock); + + set_cpu_online(cpu, true); } int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) diff --git a/arch/arm/mach-exynos4/setup-keypad.c b/arch/arm/mach-exynos4/setup-keypad.c index 1ee0ebff111..7862bfb5933 100644 --- a/arch/arm/mach-exynos4/setup-keypad.c +++ b/arch/arm/mach-exynos4/setup-keypad.c @@ -19,15 +19,16 @@ void samsung_keypad_cfg_gpio(unsigned int rows, unsigned int cols) if (rows > 8) { /* Set all the necessary GPX2 pins: KP_ROW[0~7] */ - s3c_gpio_cfgrange_nopull(EXYNOS4_GPX2(0), 8, S3C_GPIO_SFN(3)); + s3c_gpio_cfgall_range(EXYNOS4_GPX2(0), 8, S3C_GPIO_SFN(3), + S3C_GPIO_PULL_UP); /* Set all the necessary GPX3 pins: KP_ROW[8~] */ - s3c_gpio_cfgrange_nopull(EXYNOS4_GPX3(0), (rows - 8), - S3C_GPIO_SFN(3)); + s3c_gpio_cfgall_range(EXYNOS4_GPX3(0), (rows - 8), + S3C_GPIO_SFN(3), S3C_GPIO_PULL_UP); } else { /* Set all the necessary GPX2 pins: KP_ROW[x] */ - s3c_gpio_cfgrange_nopull(EXYNOS4_GPX2(0), rows, - S3C_GPIO_SFN(3)); + s3c_gpio_cfgall_range(EXYNOS4_GPX2(0), rows, S3C_GPIO_SFN(3), + S3C_GPIO_PULL_UP); } /* Set all the necessary GPX1 pins to special-function 3: KP_COL[x] */ diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index fcf0ae95651..8cdc730dcb3 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -32,6 +32,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/mtd/physmap.h> +#include <video/vga.h> #include <mach/hardware.h> #include <mach/platform.h> @@ -154,6 +155,7 @@ static struct map_desc ap_io_desc[] __initdata = { static void __init ap_map_io(void) { iotable_init(ap_io_desc, ARRAY_SIZE(ap_io_desc)); + vga_base = PCI_MEMORY_VADDR; } #define INTEGRATOR_SC_VALID_INT 0x003fffff diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index dd56bfb351e..11b86e5b71c 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -27,7 +27,6 @@ #include <linux/spinlock.h> #include <linux/init.h> #include <linux/io.h> -#include <video/vga.h> #include <mach/hardware.h> #include <mach/platform.h> @@ -505,7 +504,6 @@ void __init pci_v3_preinit(void) pcibios_min_io = 0x6000; pcibios_min_mem = 0x00100000; - vga_base = PCI_MEMORY_VADDR; /* * Hook in our fault handler for PCI errors diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c index 2028464cf5b..f79b7d2a8ed 100644 --- a/arch/arm/mach-omap2/board-2430sdp.c +++ b/arch/arm/mach-omap2/board-2430sdp.c @@ -193,7 +193,8 @@ static int __init omap2430_i2c_init(void) { omap_register_i2c_bus(1, 100, sdp2430_i2c1_boardinfo, ARRAY_SIZE(sdp2430_i2c1_boardinfo)); - omap2_pmic_init("twl4030", &sdp2430_twldata); + omap_pmic_init(2, 100, "twl4030", INT_24XX_SYS_NIRQ, + &sdp2430_twldata); return 0; } diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c index a9b45c76e1d..097a42d81e5 100644 --- a/arch/arm/mach-omap2/hsmmc.c +++ b/arch/arm/mach-omap2/hsmmc.c @@ -137,8 +137,7 @@ static void omap4_hsmmc1_before_set_reg(struct device *dev, int slot, */ reg = omap4_ctrl_pad_readl(control_pbias_offset); reg &= ~(OMAP4_MMC1_PBIASLITE_PWRDNZ_MASK | - OMAP4_MMC1_PWRDNZ_MASK | - OMAP4_USBC1_ICUSB_PWRDNZ_MASK); + OMAP4_MMC1_PWRDNZ_MASK); omap4_ctrl_pad_writel(reg, control_pbias_offset); } @@ -156,8 +155,7 @@ static void omap4_hsmmc1_after_set_reg(struct device *dev, int slot, else reg |= OMAP4_MMC1_PBIASLITE_VMODE_MASK; reg |= (OMAP4_MMC1_PBIASLITE_PWRDNZ_MASK | - OMAP4_MMC1_PWRDNZ_MASK | - OMAP4_USBC1_ICUSB_PWRDNZ_MASK); + OMAP4_MMC1_PWRDNZ_MASK); omap4_ctrl_pad_writel(reg, control_pbias_offset); timeout = jiffies + msecs_to_jiffies(5); @@ -171,16 +169,14 @@ static void omap4_hsmmc1_after_set_reg(struct device *dev, int slot, if (reg & OMAP4_MMC1_PBIASLITE_VMODE_ERROR_MASK) { pr_err("Pbias Voltage is not same as LDO\n"); /* Caution : On VMODE_ERROR Power Down MMC IO */ - reg &= ~(OMAP4_MMC1_PWRDNZ_MASK | - OMAP4_USBC1_ICUSB_PWRDNZ_MASK); + reg &= ~(OMAP4_MMC1_PWRDNZ_MASK); omap4_ctrl_pad_writel(reg, control_pbias_offset); } } else { reg = omap4_ctrl_pad_readl(control_pbias_offset); reg |= (OMAP4_MMC1_PBIASLITE_PWRDNZ_MASK | OMAP4_MMC1_PWRDNZ_MASK | - OMAP4_MMC1_PBIASLITE_VMODE_MASK | - OMAP4_USBC1_ICUSB_PWRDNZ_MASK); + OMAP4_MMC1_PBIASLITE_VMODE_MASK); omap4_ctrl_pad_writel(reg, control_pbias_offset); } } diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c index a65145b02a5..19e4dac62a8 100644 --- a/arch/arm/mach-omap2/usb-musb.c +++ b/arch/arm/mach-omap2/usb-musb.c @@ -137,9 +137,6 @@ void __init usb_musb_init(struct omap_musb_board_data *musb_board_data) musb_plat.mode = board_data->mode; musb_plat.extvbus = board_data->extvbus; - if (cpu_is_omap44xx()) - omap4430_phy_init(dev); - if (cpu_is_omap3517() || cpu_is_omap3505()) { oh_name = "am35x_otg_hs"; name = "musb-am35x"; diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c index f1d3bd8f6f1..343a540d86a 100644 --- a/arch/arm/mach-s3c2410/s3c2410.c +++ b/arch/arm/mach-s3c2410/s3c2410.c @@ -170,7 +170,9 @@ int __init s3c2410_init(void) { printk("S3C2410: Initialising architecture\n"); +#ifdef CONFIG_PM register_syscore_ops(&s3c2410_pm_syscore_ops); +#endif register_syscore_ops(&s3c24xx_irq_syscore_ops); return sysdev_register(&s3c2410_sysdev); diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c index ef0958d3e5c..57a1e01e4e5 100644 --- a/arch/arm/mach-s3c2412/s3c2412.c +++ b/arch/arm/mach-s3c2412/s3c2412.c @@ -245,7 +245,9 @@ int __init s3c2412_init(void) { printk("S3C2412: Initialising architecture\n"); +#ifdef CONFIG_PM register_syscore_ops(&s3c2412_pm_syscore_ops); +#endif register_syscore_ops(&s3c24xx_irq_syscore_ops); return sysdev_register(&s3c2412_sysdev); diff --git a/arch/arm/mach-s3c2416/s3c2416.c b/arch/arm/mach-s3c2416/s3c2416.c index 494ce913dc9..20b3fdfb305 100644 --- a/arch/arm/mach-s3c2416/s3c2416.c +++ b/arch/arm/mach-s3c2416/s3c2416.c @@ -97,7 +97,9 @@ int __init s3c2416_init(void) s3c_fb_setname("s3c2443-fb"); +#ifdef CONFIG_PM register_syscore_ops(&s3c2416_pm_syscore_ops); +#endif register_syscore_ops(&s3c24xx_irq_syscore_ops); return sysdev_register(&s3c2416_sysdev); diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c index ce99ff72838..2270d336021 100644 --- a/arch/arm/mach-s3c2440/s3c2440.c +++ b/arch/arm/mach-s3c2440/s3c2440.c @@ -55,7 +55,9 @@ int __init s3c2440_init(void) /* register suspend/resume handlers */ +#ifdef CONFIG_PM register_syscore_ops(&s3c2410_pm_syscore_ops); +#endif register_syscore_ops(&s3c244x_pm_syscore_ops); register_syscore_ops(&s3c24xx_irq_syscore_ops); diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c index 9ad99f8016a..6f2b65e6e06 100644 --- a/arch/arm/mach-s3c2440/s3c2442.c +++ b/arch/arm/mach-s3c2440/s3c2442.c @@ -169,7 +169,9 @@ int __init s3c2442_init(void) { printk("S3C2442: Initialising architecture\n"); +#ifdef CONFIG_PM register_syscore_ops(&s3c2410_pm_syscore_ops); +#endif register_syscore_ops(&s3c244x_pm_syscore_ops); register_syscore_ops(&s3c24xx_irq_syscore_ops); diff --git a/arch/arm/mach-s3c2443/clock.c b/arch/arm/mach-s3c2443/clock.c index a1a7176675b..38058af4897 100644 --- a/arch/arm/mach-s3c2443/clock.c +++ b/arch/arm/mach-s3c2443/clock.c @@ -128,7 +128,7 @@ static int s3c2443_armclk_setrate(struct clk *clk, unsigned long rate) unsigned long clkcon0; clkcon0 = __raw_readl(S3C2443_CLKDIV0); - clkcon0 &= S3C2443_CLKDIV0_ARMDIV_MASK; + clkcon0 &= ~S3C2443_CLKDIV0_ARMDIV_MASK; clkcon0 |= val << S3C2443_CLKDIV0_ARMDIV_SHIFT; __raw_writel(clkcon0, S3C2443_CLKDIV0); } diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c index ecbea92bf83..a9f3183e029 100644 --- a/arch/arm/mach-s3c64xx/mach-smdk6410.c +++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c @@ -262,45 +262,6 @@ static struct samsung_keypad_platdata smdk6410_keypad_data __initdata = { .cols = 8, }; -static int smdk6410_backlight_init(struct device *dev) -{ - int ret; - - ret = gpio_request(S3C64XX_GPF(15), "Backlight"); - if (ret) { - printk(KERN_ERR "failed to request GPF for PWM-OUT1\n"); - return ret; - } - - /* Configure GPIO pin with S3C64XX_GPF15_PWM_TOUT1 */ - s3c_gpio_cfgpin(S3C64XX_GPF(15), S3C_GPIO_SFN(2)); - - return 0; -} - -static void smdk6410_backlight_exit(struct device *dev) -{ - s3c_gpio_cfgpin(S3C64XX_GPF(15), S3C_GPIO_OUTPUT); - gpio_free(S3C64XX_GPF(15)); -} - -static struct platform_pwm_backlight_data smdk6410_backlight_data = { - .pwm_id = 1, - .max_brightness = 255, - .dft_brightness = 255, - .pwm_period_ns = 78770, - .init = smdk6410_backlight_init, - .exit = smdk6410_backlight_exit, -}; - -static struct platform_device smdk6410_backlight_device = { - .name = "pwm-backlight", - .dev = { - .parent = &s3c_device_timer[1].dev, - .platform_data = &smdk6410_backlight_data, - }, -}; - static struct map_desc smdk6410_iodesc[] = {}; static struct platform_device *smdk6410_devices[] __initdata = { diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c index 52a8e607bcc..f5f8fa89679 100644 --- a/arch/arm/mach-s5pv210/clock.c +++ b/arch/arm/mach-s5pv210/clock.c @@ -815,8 +815,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLK_DIV3, .shift = 20, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "s5pv210-fimc.0", + .name = "sclk_cam0", .enable = s5pv210_clk_mask0_ctrl, .ctrlbit = (1 << 3), }, @@ -825,8 +824,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLK_DIV1, .shift = 12, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "s5pv210-fimc.1", + .name = "sclk_cam1", .enable = s5pv210_clk_mask0_ctrl, .ctrlbit = (1 << 4), }, diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 0e1016a827a..0e0fd4d889b 100644 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -32,7 +32,6 @@ #include <asm/system.h> -#include <mach/hardware.h> #include <mach/clk.h> /* Frequency table index must be sequential starting at 0 */ diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index 4210cb434db..a3e0c8692f0 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -6,6 +6,7 @@ config UX500_SOC_COMMON select ARM_GIC select HAS_MTU select ARM_ERRATA_753970 + select ARM_ERRATA_754322 menu "Ux500 SoC" diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 3b24bfa3b82..07c4bc8ea0a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -174,6 +174,10 @@ ENTRY(v7_coherent_user_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification add r12, r12, r2 @@ -223,6 +227,10 @@ ENTRY(v7_flush_kern_dcache_area) add r1, r0, r1 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line add r0, r0, r2 @@ -247,6 +255,10 @@ v7_dma_inv_range: sub r3, r2, #1 tst r0, r3 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line tst r1, r3 @@ -270,6 +282,10 @@ v7_dma_clean_range: dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c10, 1 @ clean D / U line add r0, r0, r2 @@ -288,6 +304,10 @@ ENTRY(v7_dma_flush_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line add r0, r0, r2 diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0a0a1e7c20d..c3ff82f92d9 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -324,6 +324,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); + else + __dma_free_buffer(page, size); return addr; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index cc7e2d8be9a..f8037ba338a 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -496,6 +496,13 @@ static void __init free_unused_memmap(struct meminfo *mi) */ bank_start = min(bank_start, ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#else + /* + * Align down here since the VM subsystem insists that the + * memmap entries are valid from the bank start aligned to + * MAX_ORDER_NR_PAGES. + */ + bank_start = round_down(bank_start, MAX_ORDER_NR_PAGES); #endif /* * If we had a previous bank, and there is a space diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c index f71078ef6bb..c65eb791d1b 100644 --- a/arch/arm/plat-s5p/irq-gpioint.c +++ b/arch/arm/plat-s5p/irq-gpioint.c @@ -114,17 +114,18 @@ static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip) { static int used_gpioint_groups = 0; int group = chip->group; - struct s5p_gpioint_bank *bank = NULL; + struct s5p_gpioint_bank *b, *bank = NULL; struct irq_chip_generic *gc; struct irq_chip_type *ct; if (used_gpioint_groups >= S5P_GPIOINT_GROUP_COUNT) return -ENOMEM; - list_for_each_entry(bank, &banks, list) { - if (group >= bank->start && - group < bank->start + bank->nr_groups) + list_for_each_entry(b, &banks, list) { + if (group >= b->start && group < b->start + b->nr_groups) { + bank = b; break; + } } if (!bank) return -EINVAL; @@ -162,9 +163,9 @@ static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip) ct->chip.irq_mask = irq_gc_mask_set_bit; ct->chip.irq_unmask = irq_gc_mask_clr_bit; ct->chip.irq_set_type = s5p_gpioint_set_type, - ct->regs.ack = PEND_OFFSET + REG_OFFSET(chip->group); - ct->regs.mask = MASK_OFFSET + REG_OFFSET(chip->group); - ct->regs.type = CON_OFFSET + REG_OFFSET(chip->group); + ct->regs.ack = PEND_OFFSET + REG_OFFSET(group - bank->start); + ct->regs.mask = MASK_OFFSET + REG_OFFSET(group - bank->start); + ct->regs.type = CON_OFFSET + REG_OFFSET(group - bank->start); irq_setup_generic_chip(gc, IRQ_MSK(chip->chip.ngpio), IRQ_GC_INIT_MASK_CACHE, IRQ_NOREQUEST | IRQ_NOPROBE, 0); diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c index 302c42670bd..3b4451979d1 100644 --- a/arch/arm/plat-samsung/clock.c +++ b/arch/arm/plat-samsung/clock.c @@ -64,6 +64,17 @@ static LIST_HEAD(clocks); */ DEFINE_SPINLOCK(clocks_lock); +/* Global watchdog clock used by arch_wtd_reset() callback */ +struct clk *s3c2410_wdtclk; +static int __init s3c_wdt_reset_init(void) +{ + s3c2410_wdtclk = clk_get(NULL, "watchdog"); + if (IS_ERR(s3c2410_wdtclk)) + printk(KERN_WARNING "%s: warning: cannot get watchdog clock\n", __func__); + return 0; +} +arch_initcall(s3c_wdt_reset_init); + /* enable and disable calls for use with the clk struct */ static int clk_null_enable(struct clk *clk, int enable) diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h index 87d5b38a86f..73c66d4d10f 100644 --- a/arch/arm/plat-samsung/include/plat/clock.h +++ b/arch/arm/plat-samsung/include/plat/clock.h @@ -9,6 +9,9 @@ * published by the Free Software Foundation. */ +#ifndef __ASM_PLAT_CLOCK_H +#define __ASM_PLAT_CLOCK_H __FILE__ + #include <linux/spinlock.h> #include <linux/clkdev.h> @@ -121,3 +124,8 @@ extern int s3c64xx_sclk_ctrl(struct clk *clk, int enable); extern void s3c_pwmclk_init(void); +/* Global watchdog clock used by arch_wtd_reset() callback */ + +extern struct clk *s3c2410_wdtclk; + +#endif /* __ASM_PLAT_CLOCK_H */ diff --git a/arch/arm/plat-samsung/include/plat/watchdog-reset.h b/arch/arm/plat-samsung/include/plat/watchdog-reset.h index 54b762acb5a..40dbb2b0ae2 100644 --- a/arch/arm/plat-samsung/include/plat/watchdog-reset.h +++ b/arch/arm/plat-samsung/include/plat/watchdog-reset.h @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include <plat/clock.h> #include <plat/regs-watchdog.h> #include <mach/map.h> @@ -19,17 +20,12 @@ static inline void arch_wdt_reset(void) { - struct clk *wdtclk; - printk("arch_reset: attempting watchdog reset\n"); __raw_writel(0, S3C2410_WTCON); /* disable watchdog, to be safe */ - wdtclk = clk_get(NULL, "watchdog"); - if (!IS_ERR(wdtclk)) { - clk_enable(wdtclk); - } else - printk(KERN_WARNING "%s: warning: cannot get watchdog clock\n", __func__); + if (s3c2410_wdtclk) + clk_enable(s3c2410_wdtclk); /* put initial values into count and data */ __raw_writel(0x80, S3C2410_WTCNT); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 177cdaf8356..b122adc8bdb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -24,6 +24,7 @@ config MIPS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select HAVE_ARCH_JUMP_LABEL + select IRQ_FORCED_THREADING menu "Machine selection" @@ -722,6 +723,7 @@ config CAVIUM_OCTEON_SIMULATOR select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_HOTPLUG_CPU select SYS_HAS_CPU_CAVIUM_OCTEON + select HOLES_IN_ZONE help The Octeon simulator is software performance model of the Cavium Octeon Processor. It supports simulating Octeon processors on x86 @@ -744,6 +746,7 @@ config CAVIUM_OCTEON_REFERENCE_BOARD select ZONE_DMA32 select USB_ARCH_HAS_OHCI select USB_ARCH_HAS_EHCI + select HOLES_IN_ZONE help This option supports all of the Octeon reference boards from Cavium Networks. It builds a kernel that dynamically determines the Octeon @@ -973,6 +976,9 @@ config ISA_DMA_API config GENERIC_GPIO bool +config HOLES_IN_ZONE + bool + # # Endianess selection. Sufficiently obscure so many users don't know what to # answer,so we try hard to limit the available choices. Also the use of a diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c index 3b2c18b1434..f72c48d4804 100644 --- a/arch/mips/alchemy/common/platform.c +++ b/arch/mips/alchemy/common/platform.c @@ -492,7 +492,7 @@ static void __init alchemy_setup_macs(int ctype) memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6); ret = platform_device_register(&au1xxx_eth0_device); - if (!ret) + if (ret) printk(KERN_INFO "Alchemy: failed to register MAC0\n"); diff --git a/arch/mips/alchemy/common/power.c b/arch/mips/alchemy/common/power.c index 647e518c90b..b86324a4260 100644 --- a/arch/mips/alchemy/common/power.c +++ b/arch/mips/alchemy/common/power.c @@ -158,15 +158,21 @@ static void restore_core_regs(void) void au_sleep(void) { - int cpuid = alchemy_get_cputype(); - if (cpuid != ALCHEMY_CPU_UNKNOWN) { - save_core_regs(); - if (cpuid <= ALCHEMY_CPU_AU1500) - alchemy_sleep_au1000(); - else if (cpuid <= ALCHEMY_CPU_AU1200) - alchemy_sleep_au1550(); - restore_core_regs(); + save_core_regs(); + + switch (alchemy_get_cputype()) { + case ALCHEMY_CPU_AU1000: + case ALCHEMY_CPU_AU1500: + case ALCHEMY_CPU_AU1100: + alchemy_sleep_au1000(); + break; + case ALCHEMY_CPU_AU1550: + case ALCHEMY_CPU_AU1200: + alchemy_sleep_au1550(); + break; } + + restore_core_regs(); } #endif /* CONFIG_PM */ diff --git a/arch/mips/alchemy/devboards/bcsr.c b/arch/mips/alchemy/devboards/bcsr.c index 596ad00e7f0..463d2c4d944 100644 --- a/arch/mips/alchemy/devboards/bcsr.c +++ b/arch/mips/alchemy/devboards/bcsr.c @@ -89,8 +89,12 @@ static void bcsr_csc_handler(unsigned int irq, struct irq_desc *d) { unsigned short bisr = __raw_readw(bcsr_virt + BCSR_REG_INTSTAT); + disable_irq_nosync(irq); + for ( ; bisr; bisr &= bisr - 1) generic_handle_irq(bcsr_csc_base + __ffs(bisr)); + + enable_irq(irq); } /* NOTE: both the enable and mask bits must be cleared, otherwise the diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c index 1dac4f27d33..4a8980027ec 100644 --- a/arch/mips/alchemy/devboards/db1200/setup.c +++ b/arch/mips/alchemy/devboards/db1200/setup.c @@ -23,13 +23,6 @@ void __init board_setup(void) unsigned long freq0, clksrc, div, pfc; unsigned short whoami; - /* Set Config[OD] (disable overlapping bus transaction): - * This gets rid of a _lot_ of spurious interrupts (especially - * wrt. IDE); but incurs ~10% performance hit in some - * cpu-bound applications. - */ - set_c0_config(1 << 19); - bcsr_init(DB1200_BCSR_PHYS_ADDR, DB1200_BCSR_PHYS_ADDR + DB1200_BCSR_HEXLED_OFS); diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c index 03db3daadbd..88c4babfdb5 100644 --- a/arch/mips/ar7/irq.c +++ b/arch/mips/ar7/irq.c @@ -98,7 +98,8 @@ static struct irq_chip ar7_sec_irq_type = { static struct irqaction ar7_cascade_action = { .handler = no_action, - .name = "AR7 cascade interrupt" + .name = "AR7 cascade interrupt", + .flags = IRQF_NO_THREAD, }; static void __init ar7_irq_init(int base) diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c index cea6021cb8d..162e11b4ed7 100644 --- a/arch/mips/bcm63xx/irq.c +++ b/arch/mips/bcm63xx/irq.c @@ -222,6 +222,7 @@ static struct irq_chip bcm63xx_external_irq_chip = { static struct irqaction cpu_ip2_cascade_action = { .handler = no_action, .name = "cascade_ip2", + .flags = IRQF_NO_THREAD, }; void __init arch_init_irq(void) diff --git a/arch/mips/cobalt/irq.c b/arch/mips/cobalt/irq.c index cb9bf820fe5..965c777d356 100644 --- a/arch/mips/cobalt/irq.c +++ b/arch/mips/cobalt/irq.c @@ -48,6 +48,7 @@ asmlinkage void plat_irq_dispatch(void) static struct irqaction cascade = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; void __init arch_init_irq(void) diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index fa45e924be0..f7b7ba6d5c4 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -101,20 +101,24 @@ int cpu_fpu_mask = DEC_CPU_IRQ_MASK(DEC_CPU_INR_FPU); static struct irqaction ioirq = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; static struct irqaction fpuirq = { .handler = no_action, .name = "fpu", + .flags = IRQF_NO_THREAD, }; static struct irqaction busirq = { .flags = IRQF_DISABLED, .name = "bus error", + .flags = IRQF_NO_THREAD, }; static struct irqaction haltirq = { .handler = dec_intr_halt, .name = "halt", + .flags = IRQF_NO_THREAD, }; diff --git a/arch/mips/emma/markeins/irq.c b/arch/mips/emma/markeins/irq.c index 3dbd7a5a6ad..7798887a128 100644 --- a/arch/mips/emma/markeins/irq.c +++ b/arch/mips/emma/markeins/irq.c @@ -169,7 +169,7 @@ void emma2rh_gpio_irq_init(void) static struct irqaction irq_cascade = { .handler = no_action, - .flags = 0, + .flags = IRQF_NO_THREAD, .name = "cascade", .dev_id = NULL, .next = NULL, diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index 0d5a42b5f47..a58addb98cf 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -54,7 +54,6 @@ #define cpu_has_mips_r2_exec_hazard 0 #define cpu_has_dsp 0 #define cpu_has_mipsmt 0 -#define cpu_has_userlocal 0 #define cpu_has_vint 0 #define cpu_has_veic 0 #define cpu_hwrena_impl_bits 0xc0000000 diff --git a/arch/mips/include/asm/mach-powertv/dma-coherence.h b/arch/mips/include/asm/mach-powertv/dma-coherence.h index 62c09408594..35371641575 100644 --- a/arch/mips/include/asm/mach-powertv/dma-coherence.h +++ b/arch/mips/include/asm/mach-powertv/dma-coherence.h @@ -13,7 +13,6 @@ #define __ASM_MACH_POWERTV_DMA_COHERENCE_H #include <linux/sched.h> -#include <linux/version.h> #include <linux/device.h> #include <asm/mach-powertv/asic.h> diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index b4ba2449444..cb41af5f340 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -195,9 +195,9 @@ * to cover the pipeline delay. */ .set mips32 - mfc0 v1, CP0_TCSTATUS + mfc0 k0, CP0_TCSTATUS .set mips0 - LONG_S v1, PT_TCSTATUS(sp) + LONG_S k0, PT_TCSTATUS(sp) #endif /* CONFIG_MIPS_MT_SMTC */ LONG_S $4, PT_R4(sp) LONG_S $5, PT_R5(sp) diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index 73031f7fc82..4397972949f 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -18,7 +18,7 @@ #include <linux/init.h> #include <linux/spinlock.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/io.h> #include <linux/gpio.h> #include <linux/delay.h> @@ -86,7 +86,6 @@ struct jz_gpio_chip { spinlock_t lock; struct gpio_chip gpio_chip; - struct sys_device sysdev; }; static struct jz_gpio_chip jz4740_gpio_chips[]; @@ -459,49 +458,47 @@ static struct jz_gpio_chip jz4740_gpio_chips[] = { JZ4740_GPIO_CHIP(D), }; -static inline struct jz_gpio_chip *sysdev_to_chip(struct sys_device *dev) +static void jz4740_gpio_suspend_chip(struct jz_gpio_chip *chip) { - return container_of(dev, struct jz_gpio_chip, sysdev); + chip->suspend_mask = readl(chip->base + JZ_REG_GPIO_MASK); + writel(~(chip->wakeup), chip->base + JZ_REG_GPIO_MASK_SET); + writel(chip->wakeup, chip->base + JZ_REG_GPIO_MASK_CLEAR); } -static int jz4740_gpio_suspend(struct sys_device *dev, pm_message_t state) +static int jz4740_gpio_suspend(void) { - struct jz_gpio_chip *chip = sysdev_to_chip(dev); + int i; - chip->suspend_mask = readl(chip->base + JZ_REG_GPIO_MASK); - writel(~(chip->wakeup), chip->base + JZ_REG_GPIO_MASK_SET); - writel(chip->wakeup, chip->base + JZ_REG_GPIO_MASK_CLEAR); + for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); i++) + jz4740_gpio_suspend_chip(&jz4740_gpio_chips[i]); return 0; } -static int jz4740_gpio_resume(struct sys_device *dev) +static void jz4740_gpio_resume_chip(struct jz_gpio_chip *chip) { - struct jz_gpio_chip *chip = sysdev_to_chip(dev); uint32_t mask = chip->suspend_mask; writel(~mask, chip->base + JZ_REG_GPIO_MASK_CLEAR); writel(mask, chip->base + JZ_REG_GPIO_MASK_SET); +} - return 0; +static void jz4740_gpio_resume(void) +{ + int i; + + for (i = ARRAY_SIZE(jz4740_gpio_chips) - 1; i >= 0 ; i--) + jz4740_gpio_resume_chip(&jz4740_gpio_chips[i]); } -static struct sysdev_class jz4740_gpio_sysdev_class = { - .name = "gpio", +static struct syscore_ops jz4740_gpio_syscore_ops = { .suspend = jz4740_gpio_suspend, .resume = jz4740_gpio_resume, }; -static int jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id) +static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id) { - int ret, irq; - - chip->sysdev.id = id; - chip->sysdev.cls = &jz4740_gpio_sysdev_class; - ret = sysdev_register(&chip->sysdev); - - if (ret) - return ret; + int irq; spin_lock_init(&chip->lock); @@ -519,22 +516,17 @@ static int jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id) irq_set_chip_and_handler(irq, &jz_gpio_irq_chip, handle_level_irq); } - - return 0; } static int __init jz4740_gpio_init(void) { unsigned int i; - int ret; - - ret = sysdev_class_register(&jz4740_gpio_sysdev_class); - if (ret) - return ret; for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i) jz4740_gpio_chip_init(&jz4740_gpio_chips[i], i); + register_syscore_ops(&jz4740_gpio_syscore_ops); + printk(KERN_INFO "JZ4740 GPIO initialized\n"); return 0; diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index feb8021a305..6a2d758dd8e 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -19,6 +19,26 @@ #include <asm-generic/sections.h> +#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) +#define MCOUNT_OFFSET_INSNS 5 +#else +#define MCOUNT_OFFSET_INSNS 4 +#endif + +/* + * Check if the address is in kernel space + * + * Clone core_kernel_text() from kernel/extable.c, but doesn't call + * init_kernel_text() for Ftrace doesn't trace functions in init sections. + */ +static inline int in_kernel_space(unsigned long ip) +{ + if (ip >= (unsigned long)_stext && + ip <= (unsigned long)_etext) + return 1; + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ @@ -54,20 +74,6 @@ static inline void ftrace_dyn_arch_init_insns(void) #endif } -/* - * Check if the address is in kernel space - * - * Clone core_kernel_text() from kernel/extable.c, but doesn't call - * init_kernel_text() for Ftrace doesn't trace functions in init sections. - */ -static inline int in_kernel_space(unsigned long ip) -{ - if (ip >= (unsigned long)_stext && - ip <= (unsigned long)_etext) - return 1; - return 0; -} - static int ftrace_modify_code(unsigned long ip, unsigned int new_code) { int faulted; @@ -112,11 +118,6 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code) * 1: offset = 4 instructions */ -#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) -#define MCOUNT_OFFSET_INSNS 5 -#else -#define MCOUNT_OFFSET_INSNS 4 -#endif #define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS) int ftrace_make_nop(struct module *mod, diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c index 5c74eb797f0..32b397b646e 100644 --- a/arch/mips/kernel/i8259.c +++ b/arch/mips/kernel/i8259.c @@ -229,7 +229,7 @@ static void i8259A_shutdown(void) */ if (i8259A_auto_eoi >= 0) { outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } } @@ -295,6 +295,7 @@ static void init_8259A(int auto_eoi) static struct irqaction irq2 = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; static struct resource pic1_io_resource = { diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 876a75cc376..922a554cd10 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -349,3 +349,10 @@ SYSCALL_DEFINE6(32_fanotify_mark, int, fanotify_fd, unsigned int, flags, return sys_fanotify_mark(fanotify_fd, flags, merge_64(a3, a4), dfd, pathname); } + +SYSCALL_DEFINE6(32_futex, u32 __user *, uaddr, int, op, u32, val, + struct compat_timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) +{ + return compat_sys_futex(uaddr, op, val, utime, uaddr2, val3); +} diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index f9296e894e4..6de1f598346 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -315,7 +315,7 @@ EXPORT(sysn32_call_table) PTR sys_fremovexattr PTR sys_tkill PTR sys_ni_syscall - PTR compat_sys_futex + PTR sys_32_futex PTR compat_sys_sched_setaffinity /* 6195 */ PTR compat_sys_sched_getaffinity PTR sys_cacheflush diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 4d7c9827706..1d813169e45 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -441,7 +441,7 @@ sys_call_table: PTR sys_fremovexattr /* 4235 */ PTR sys_tkill PTR sys_sendfile64 - PTR compat_sys_futex + PTR sys_32_futex PTR compat_sys_sched_setaffinity PTR compat_sys_sched_getaffinity /* 4240 */ PTR compat_sys_io_setup diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index dbbe0ce48d8..f8524003676 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -8,6 +8,7 @@ * Copyright (C) 1999, 2000 Silicon Graphics, Inc. */ #include <linux/cache.h> +#include <linux/irqflags.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/personality.h> @@ -658,6 +659,8 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + local_irq_enable(); + /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b7517e3abc8..cbea618af0b 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -14,6 +14,7 @@ #include <linux/bug.h> #include <linux/compiler.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/sched.h> @@ -364,21 +365,26 @@ static int regs_to_trapnr(struct pt_regs *regs) return (regs->cp0_cause >> 2) & 0x1f; } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); void __noreturn die(const char *str, struct pt_regs *regs) { static int die_counter; int sig = SIGSEGV; #ifdef CONFIG_MIPS_MT_SMTC - unsigned long dvpret = dvpe(); + unsigned long dvpret; #endif /* CONFIG_MIPS_MT_SMTC */ + oops_enter(); + if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) sig = 0; console_verbose(); - spin_lock_irq(&die_lock); + raw_spin_lock_irq(&die_lock); +#ifdef CONFIG_MIPS_MT_SMTC + dvpret = dvpe(); +#endif /* CONFIG_MIPS_MT_SMTC */ bust_spinlocks(1); #ifdef CONFIG_MIPS_MT_SMTC mips_mt_regdump(dvpret); @@ -387,7 +393,9 @@ void __noreturn die(const char *str, struct pt_regs *regs) printk("%s[#%d]:\n", str, ++die_counter); show_registers(regs); add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); + raw_spin_unlock_irq(&die_lock); + + oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 2cd50ad0d5c..3efcb065f78 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -192,7 +192,7 @@ static struct tc *get_tc(int index) } spin_unlock(&vpecontrol.tc_list_lock); - return NULL; + return res; } /* allocate a vpe and associate it with this minor (or index) */ diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index fc89795cafd..f9737bb3c5a 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -123,11 +123,10 @@ void ltq_enable_irq(struct irq_data *d) static unsigned int ltq_startup_eiu_irq(struct irq_data *d) { int i; - int irq_nr = d->irq - INT_NUM_IRQ0; ltq_enable_irq(d); for (i = 0; i < MAX_EIU; i++) { - if (irq_nr == ltq_eiu_irq[i]) { + if (d->irq == ltq_eiu_irq[i]) { /* low level - we should really handle set_type */ ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) | (0x6 << (i * 4)), LTQ_EIU_EXIN_C); @@ -147,11 +146,10 @@ static unsigned int ltq_startup_eiu_irq(struct irq_data *d) static void ltq_shutdown_eiu_irq(struct irq_data *d) { int i; - int irq_nr = d->irq - INT_NUM_IRQ0; ltq_disable_irq(d); for (i = 0; i < MAX_EIU; i++) { - if (irq_nr == ltq_eiu_irq[i]) { + if (d->irq == ltq_eiu_irq[i]) { /* disable */ ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~(1 << i), LTQ_EIU_EXIN_INEN); diff --git a/arch/mips/lantiq/xway/ebu.c b/arch/mips/lantiq/xway/ebu.c index 66eb52fa50a..033b3184c7a 100644 --- a/arch/mips/lantiq/xway/ebu.c +++ b/arch/mips/lantiq/xway/ebu.c @@ -10,7 +10,6 @@ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/version.h> #include <linux/ioport.h> #include <lantiq_soc.h> diff --git a/arch/mips/lantiq/xway/pmu.c b/arch/mips/lantiq/xway/pmu.c index 9d69f01e352..39f0d2641cb 100644 --- a/arch/mips/lantiq/xway/pmu.c +++ b/arch/mips/lantiq/xway/pmu.c @@ -8,7 +8,6 @@ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/version.h> #include <linux/ioport.h> #include <lantiq_soc.h> diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c index de4c165515d..d608b6ef0ed 100644 --- a/arch/mips/lasat/interrupt.c +++ b/arch/mips/lasat/interrupt.c @@ -105,6 +105,7 @@ asmlinkage void plat_irq_dispatch(void) static struct irqaction cascade = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; void __init arch_init_irq(void) diff --git a/arch/mips/loongson/fuloong-2e/irq.c b/arch/mips/loongson/fuloong-2e/irq.c index d61a04222b8..3cf1fef29f0 100644 --- a/arch/mips/loongson/fuloong-2e/irq.c +++ b/arch/mips/loongson/fuloong-2e/irq.c @@ -42,6 +42,7 @@ asmlinkage void mach_irq_dispatch(unsigned int pending) static struct irqaction cascade_irqaction = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; void __init mach_init_irq(void) diff --git a/arch/mips/loongson/lemote-2f/irq.c b/arch/mips/loongson/lemote-2f/irq.c index 081db102bb9..14b081841b6 100644 --- a/arch/mips/loongson/lemote-2f/irq.c +++ b/arch/mips/loongson/lemote-2f/irq.c @@ -96,12 +96,13 @@ static irqreturn_t ip6_action(int cpl, void *dev_id) struct irqaction ip6_irqaction = { .handler = ip6_action, .name = "cascade", - .flags = IRQF_SHARED, + .flags = IRQF_SHARED | IRQF_NO_THREAD, }; struct irqaction cascade_irqaction = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; void __init mach_init_irq(void) diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 9ff5d0fac55..302d779d5b0 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -6,6 +6,7 @@ * Copyright (C) 2011 Wind River Systems, * written by Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/compiler.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/mman.h> @@ -15,12 +16,11 @@ #include <linux/sched.h> unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ - EXPORT_SYMBOL(shm_align_mask); /* gap between mmap and stack */ #define MIN_GAP (128*1024*1024UL) -#define MAX_GAP ((TASK_SIZE)/6*5) +#define MAX_GAP ((TASK_SIZE)/6*5) static int mmap_is_legacy(void) { @@ -57,13 +57,13 @@ static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, return base - off; } -#define COLOUR_ALIGN(addr,pgoff) \ +#define COLOUR_ALIGN(addr, pgoff) \ ((((addr) + shm_align_mask) & ~shm_align_mask) + \ (((pgoff) << PAGE_SHIFT) & shm_align_mask)) enum mmap_allocation_direction {UP, DOWN}; -static unsigned long arch_get_unmapped_area_foo(struct file *filp, +static unsigned long arch_get_unmapped_area_common(struct file *filp, unsigned long addr0, unsigned long len, unsigned long pgoff, unsigned long flags, enum mmap_allocation_direction dir) { @@ -103,16 +103,16 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vma->vm_start)) return addr; } if (dir == UP) { addr = mm->mmap_base; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ @@ -131,28 +131,30 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp, mm->free_area_cache = mm->mmap_base; } - /* either no address requested or can't fit in requested address hole */ + /* + * either no address requested, or the mapping can't fit into + * the requested address hole + */ addr = mm->free_area_cache; - if (do_color_align) { - unsigned long base = - COLOUR_ALIGN_DOWN(addr - len, pgoff); - + if (do_color_align) { + unsigned long base = + COLOUR_ALIGN_DOWN(addr - len, pgoff); addr = base + len; - } + } /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { vma = find_vma(mm, addr - len); if (!vma || addr <= vma->vm_start) { - /* remember the address as a hint for next time */ - return mm->free_area_cache = addr-len; + /* cache the address as a hint for next time */ + return mm->free_area_cache = addr - len; } } if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = mm->mmap_base-len; + addr = mm->mmap_base - len; if (do_color_align) addr = COLOUR_ALIGN_DOWN(addr, pgoff); @@ -163,8 +165,8 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { - /* remember the address as a hint for next time */ + if (likely(!vma || addr + len <= vma->vm_start)) { + /* cache the address as a hint for next time */ return mm->free_area_cache = addr; } @@ -173,7 +175,7 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; + addr = vma->vm_start - len; if (do_color_align) addr = COLOUR_ALIGN_DOWN(addr, pgoff); } while (likely(len < vma->vm_start)); @@ -201,7 +203,7 @@ bottomup: unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, unsigned long len, unsigned long pgoff, unsigned long flags) { - return arch_get_unmapped_area_foo(filp, + return arch_get_unmapped_area_common(filp, addr0, len, pgoff, flags, UP); } @@ -213,7 +215,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0, unsigned long len, unsigned long pgoff, unsigned long flags) { - return arch_get_unmapped_area_foo(filp, + return arch_get_unmapped_area_common(filp, addr0, len, pgoff, flags, DOWN); } diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index b6e1cff5066..e06370f58ef 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1759,14 +1759,13 @@ static void __cpuinit build_r3000_tlb_modify_handler(void) u32 *p = handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - struct work_registers wr; memset(handle_tlbm, 0, sizeof(handle_tlbm)); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); + build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); uasm_i_nop(&p); /* load delay */ build_make_write(&p, &r, K0, K1); build_r3000_pte_reload_tlbwi(&p, K0, K1); @@ -1963,7 +1962,8 @@ static void __cpuinit build_r4000_tlb_load_handler(void) uasm_i_andi(&p, wr.r3, wr.r3, 2); uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); } - + if (PM_DEFAULT_MASK == 0) + uasm_i_nop(&p); /* * We clobbered C0_PAGEMASK, restore it. On the other branch * it is restored in build_huge_tlb_write_entry. diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 1d36c511a7a..d53ff91b277 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -350,12 +350,14 @@ unsigned int plat_ipi_resched_int_xlate(unsigned int cpu) static struct irqaction i8259irq = { .handler = no_action, - .name = "XT-PIC cascade" + .name = "XT-PIC cascade", + .flags = IRQF_NO_THREAD, }; static struct irqaction corehi_irqaction = { .handler = no_action, - .name = "CoreHi" + .name = "CoreHi", + .flags = IRQF_NO_THREAD, }; static msc_irqmap_t __initdata msc_irqmap[] = { diff --git a/arch/mips/netlogic/xlr/Makefile b/arch/mips/netlogic/xlr/Makefile index 9bd3f731f62..2dca585dd2f 100644 --- a/arch/mips/netlogic/xlr/Makefile +++ b/arch/mips/netlogic/xlr/Makefile @@ -2,4 +2,4 @@ obj-y += setup.o platform.o irq.o setup.o time.o obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_EARLY_PRINTK) += xlr_console.o -EXTRA_CFLAGS += -Werror +ccflags-y += -Werror diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 603d7493e96..8656388b34b 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -171,8 +171,13 @@ static int __devinit ltq_pci_startup(struct ltq_pci_data *conf) u32 temp_buffer; /* set clock to 33Mhz */ - ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR); - ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR); + if (ltq_is_ar9()) { + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0x1f00000, LTQ_CGU_IFCCR); + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0xe00000, LTQ_CGU_IFCCR); + } else { + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR); + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR); + } /* external or internal clock ? */ if (conf->clock) { diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c index 764362ce5e4..5f3a69cebad 100644 --- a/arch/mips/pci/pci-rc32434.c +++ b/arch/mips/pci/pci-rc32434.c @@ -215,7 +215,7 @@ static int __init rc32434_pci_init(void) rc32434_pcibridge_init(); io_map_base = ioremap(rc32434_res_pci_io1.start, - resource_size(&rcrc32434_res_pci_io1)); + resource_size(&rc32434_res_pci_io1)); if (!io_map_base) return -ENOMEM; diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq.c b/arch/mips/pmc-sierra/msp71xx/msp_irq.c index 4531c4a514b..d3c3d81757a 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_irq.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_irq.c @@ -108,12 +108,14 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) static struct irqaction cic_cascade_msp = { .handler = no_action, - .name = "MSP CIC cascade" + .name = "MSP CIC cascade", + .flags = IRQF_NO_THREAD, }; static struct irqaction per_cascade_msp = { .handler = no_action, - .name = "MSP PER cascade" + .name = "MSP PER cascade", + .flags = IRQF_NO_THREAD, }; void __init arch_init_irq(void) diff --git a/arch/mips/pnx8550/common/int.c b/arch/mips/pnx8550/common/int.c index 6b93c81779c..1ebe22bdadc 100644 --- a/arch/mips/pnx8550/common/int.c +++ b/arch/mips/pnx8550/common/int.c @@ -167,7 +167,7 @@ static struct irq_chip level_irq_type = { static struct irqaction gic_action = { .handler = no_action, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NO_THREAD, .name = "GIC", }; diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c index b4d08e4d2ea..f72c336ea27 100644 --- a/arch/mips/sgi-ip22/ip22-int.c +++ b/arch/mips/sgi-ip22/ip22-int.c @@ -155,32 +155,32 @@ static void __irq_entry indy_buserror_irq(void) static struct irqaction local0_cascade = { .handler = no_action, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NO_THREAD, .name = "local0 cascade", }; static struct irqaction local1_cascade = { .handler = no_action, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NO_THREAD, .name = "local1 cascade", }; static struct irqaction buserr = { .handler = no_action, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NO_THREAD, .name = "Bus Error", }; static struct irqaction map0_cascade = { .handler = no_action, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NO_THREAD, .name = "mapable0 cascade", }; #ifdef USE_LIO3_IRQ static struct irqaction map1_cascade = { .handler = no_action, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NO_THREAD, .name = "mapable1 cascade", }; #define SGI_INTERRUPTS SGINT_END diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c index a7e5a6d917b..3ab5b5d25b0 100644 --- a/arch/mips/sni/rm200.c +++ b/arch/mips/sni/rm200.c @@ -359,6 +359,7 @@ void sni_rm200_init_8259A(void) static struct irqaction sni_rm200_irq2 = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; static struct resource sni_rm200_pic1_resource = { diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c index 70a3b85f375..fad2bef432c 100644 --- a/arch/mips/vr41xx/common/irq.c +++ b/arch/mips/vr41xx/common/irq.c @@ -34,6 +34,7 @@ static irq_cascade_t irq_cascade[NR_IRQS] __cacheline_aligned; static struct irqaction cascade_irqaction = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int)) diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 5cc83851ad0..31a7d3a7ce2 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -561,6 +561,20 @@ static struct pci_ops u4_pcie_pci_ops = .write = u4_pcie_write_config, }; +static void __devinit pmac_pci_fixup_u4_of_node(struct pci_dev *dev) +{ + /* Apple's device-tree "hides" the root complex virtual P2P bridge + * on U4. However, Linux sees it, causing the PCI <-> OF matching + * code to fail to properly match devices below it. This works around + * it by setting the node of the bridge to point to the PHB node, + * which is not entirely correct but fixes the matching code and + * doesn't break anything else. It's also the simplest possible fix. + */ + if (dev->dev.of_node == NULL) + dev->dev.of_node = pcibios_get_phb_of_node(dev->bus); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, 0x5b, pmac_pci_fixup_u4_of_node); + #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC32 diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 64b61bf72e9..547f1a6a35d 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -188,7 +188,8 @@ extern char elf_platform[]; #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ - set_personality(PER_LINUX); \ + set_personality(PER_LINUX | \ + (current->personality & ~PER_MASK)); \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ set_thread_flag(TIF_31BIT); \ else \ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 519eb5f187e..c0cb794bb36 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -658,12 +658,14 @@ static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) * struct gmap_struct - guest address space * @mm: pointer to the parent mm_struct * @table: pointer to the page directory + * @asce: address space control element for gmap page table * @crst_list: list of all crst tables used in the guest address space */ struct gmap { struct list_head list; struct mm_struct *mm; unsigned long *table; + unsigned long asce; struct list_head crst_list; }; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 532fd432215..2b45591e158 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -10,6 +10,7 @@ #include <linux/sched.h> #include <asm/vdso.h> #include <asm/sigp.h> +#include <asm/pgtable.h> /* * Make sure that the compiler is new enough. We want a compiler that @@ -126,6 +127,7 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); @@ -151,6 +153,7 @@ int main(void) DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 5f729d627ce..713da076053 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1076,6 +1076,11 @@ sie_loop: lg %r14,__LC_THREAD_INFO # pointer thread_info struct tm __TI_flags+7(%r14),_TIF_EXIT_SIE jnz sie_exit + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer SPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) @@ -1083,6 +1088,7 @@ sie_done: SPP __LC_CMF_HPP # set host id lg %r14,__LC_THREAD_INFO # pointer thread_info struct sie_exit: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f17296e4fc8..dc2b580e27b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -123,6 +123,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: r = 1; break; default: @@ -263,10 +264,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); + gmap_enable(vcpu->arch.gmap); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->arch.guest_acrs); restore_fp_regs(&vcpu->arch.host_fpregs); @@ -461,7 +464,6 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_disable(); kvm_guest_enter(); local_irq_enable(); - gmap_enable(vcpu->arch.gmap); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { @@ -470,7 +472,6 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); - gmap_disable(vcpu->arch.gmap); local_irq_disable(); kvm_guest_exit(); local_irq_enable(); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4d1f2bce87b..5d56c2b95b1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -160,6 +160,8 @@ struct gmap *gmap_alloc(struct mm_struct *mm) table = (unsigned long *) page_to_phys(page); crst_table_init(table, _REGION1_ENTRY_EMPTY); gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); list_add(&gmap->list, &mm->context.gmap_list); return gmap; @@ -240,10 +242,6 @@ EXPORT_SYMBOL_GPL(gmap_free); */ void gmap_enable(struct gmap *gmap) { - /* Load primary space page table origin. */ - S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(gmap->table); - asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); S390_lowcore.gmap = (unsigned long) gmap; } EXPORT_SYMBOL_GPL(gmap_enable); @@ -254,10 +252,6 @@ EXPORT_SYMBOL_GPL(gmap_enable); */ void gmap_disable(struct gmap *gmap) { - /* Load primary space page table origin. */ - S390_lowcore.user_asce = - gmap->mm->context.asce_bits | __pa(gmap->mm->pgd); - asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); S390_lowcore.gmap = 0UL; } EXPORT_SYMBOL_GPL(gmap_disable); @@ -309,15 +303,15 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 20) & 0x7ff); @@ -325,6 +319,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV; } +out: up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h index 1407c07bdad..f6ae2b2b687 100644 --- a/arch/sparc/include/asm/pgtsrmmu.h +++ b/arch/sparc/include/asm/pgtsrmmu.h @@ -280,7 +280,7 @@ static inline unsigned long srmmu_hwprobe(unsigned long vaddr) return retval; } #else -#define srmmu_hwprobe(addr) (srmmu_swprobe(addr, 0) & SRMMU_PTE_PMASK) +#define srmmu_hwprobe(addr) srmmu_swprobe(addr, 0) #endif static inline int diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 55a17c6efeb..d06a2660175 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -43,6 +43,8 @@ #define SUN4V_CHIP_NIAGARA1 0x01 #define SUN4V_CHIP_NIAGARA2 0x02 #define SUN4V_CHIP_NIAGARA3 0x03 +#define SUN4V_CHIP_NIAGARA4 0x04 +#define SUN4V_CHIP_NIAGARA5 0x05 #define SUN4V_CHIP_UNKNOWN 0xff #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h index 9ed6ff679ab..ee8edc68423 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/arch/sparc/include/asm/xor_64.h @@ -66,6 +66,8 @@ static struct xor_block_template xor_block_niagara = { ((tlb_type == hypervisor && \ (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) ? \ &xor_block_niagara : \ &xor_block_VIS) diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 9810fd88105..ba9b1cec4e6 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -481,6 +481,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara3"; break; + case SUN4V_CHIP_NIAGARA4: + sparc_cpu_type = "UltraSparc T4 (Niagara4)"; + sparc_fpu_type = "UltraSparc T4 integrated FPU"; + sparc_pmu_type = "niagara4"; + break; + + case SUN4V_CHIP_NIAGARA5: + sparc_cpu_type = "UltraSparc T5 (Niagara5)"; + sparc_fpu_type = "UltraSparc T5 integrated FPU"; + sparc_pmu_type = "niagara5"; + break; + default: printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", prom_cpu_compatible); diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 4197e8d62d4..9323eafccb9 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -325,6 +325,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA1: case SUN4V_CHIP_NIAGARA2: case SUN4V_CHIP_NIAGARA3: + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: rover_inc_table = niagara_iterate_method; break; default: diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 0eac1b2fc53..0d810c2f1d0 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -133,7 +133,7 @@ prom_sun4v_name: prom_niagara_prefix: .asciz "SUNW,UltraSPARC-T" prom_sparc_prefix: - .asciz "SPARC-T" + .asciz "SPARC-" .align 4 prom_root_compatible: .skip 64 @@ -396,7 +396,7 @@ sun4v_chip_type: or %g1, %lo(prom_cpu_compatible), %g1 sethi %hi(prom_sparc_prefix), %g7 or %g7, %lo(prom_sparc_prefix), %g7 - mov 7, %g3 + mov 6, %g3 90: ldub [%g7], %g2 ldub [%g1], %g4 cmp %g2, %g4 @@ -408,10 +408,23 @@ sun4v_chip_type: sethi %hi(prom_cpu_compatible), %g1 or %g1, %lo(prom_cpu_compatible), %g1 - ldub [%g1 + 7], %g2 + ldub [%g1 + 6], %g2 + cmp %g2, 'T' + be,pt %xcc, 70f + cmp %g2, 'M' + bne,pn %xcc, 4f + nop + +70: ldub [%g1 + 7], %g2 cmp %g2, '3' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA3, %g4 + cmp %g2, '4' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA4, %g4 + cmp %g2, '5' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA5, %g4 ba,pt %xcc, 4f nop @@ -545,6 +558,12 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA3 be,pt %xcc, niagara2_patch nop + cmp %g1, SUN4V_CHIP_NIAGARA4 + be,pt %xcc, niagara2_patch + nop + cmp %g1, SUN4V_CHIP_NIAGARA5 + be,pt %xcc, niagara2_patch + nop call generic_patch_copyops nop diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 1e94f946570..8aa0d440858 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -230,7 +230,8 @@ static void pci_parse_of_addrs(struct platform_device *op, res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; } else if (i == dev->rom_base_reg) { res = &dev->resource[PCI_ROM_RESOURCE]; - flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE + | IORESOURCE_SIZEALIGN; } else { printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); continue; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c8cc461ff75..f793742eec2 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -380,8 +380,7 @@ void flush_thread(void) #endif } - /* Now, this task is no longer a kernel thread. */ - current->thread.current_ds = USER_DS; + /* This task is no longer a kernel thread. */ if (current->thread.flags & SPARC_FLAG_KTHREAD) { current->thread.flags &= ~SPARC_FLAG_KTHREAD; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index c158a95ec66..d959cd0a4aa 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -368,9 +368,6 @@ void flush_thread(void) /* Clear FPU register state. */ t->fpsaved[0] = 0; - - if (get_thread_current_ds() != ASI_AIUS) - set_fs(USER_DS); } /* It's a bit more tricky when 64-bit tasks are involved... */ diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index d26e1f6c717..3e3e2914c70 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -137,7 +137,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Just ignore, this behavior is now the default. */ + prom_early_console.flags &= ~CON_BOOT; break; default: printk("Unknown boot switch (-%c)\n", c); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 3c5bb784214..c965595aa7e 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -106,7 +106,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Just ignore, this behavior is now the default. */ + prom_early_console.flags &= ~CON_BOOT; break; case 'P': /* Force UltraSPARC-III P-Cache on. */ @@ -425,10 +425,14 @@ static void __init init_sparc64_elf_hwcap(void) else if (tlb_type == hypervisor) { if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= HWCAP_SPARC_N2; } @@ -452,11 +456,15 @@ static void __init init_sparc64_elf_hwcap(void) if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1) cap |= AV_SPARC_ASI_BLK_INIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | AV_SPARC_POPC); - if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); } diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 1ba95aff5d5..2caa556db86 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -273,10 +273,7 @@ void do_sigreturn32(struct pt_regs *regs) case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32); } sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); return; segv: @@ -377,10 +374,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) case 1: set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32); } sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); return; segv: force_sig(SIGSEGV, current); @@ -782,6 +776,7 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int err; if (ka->sa.sa_flags & SA_SIGINFO) @@ -792,12 +787,10 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka, if (err) return err; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NOMASK)) - sigaddset(¤t->blocked,signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigaddset(&blocked, signr); + set_current_blocked(&blocked); tracehook_signal_handler(signr, info, ka, regs, 0); @@ -881,7 +874,7 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs, */ if (current_thread_info()->status & TS_RESTORE_SIGMASK) { current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + set_current_blocked(¤t->saved_sigmask); } } diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 04ede8f04ad..8ce247ac04c 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -62,12 +62,13 @@ struct rt_signal_frame { static int _sigpause_common(old_sigset_t set) { - set &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); + sigset_t blocked; + current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, set); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + + set &= _BLOCKABLE; + siginitset(&blocked, set); + set_current_blocked(&blocked); current->state = TASK_INTERRUPTIBLE; schedule(); @@ -139,10 +140,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) goto segv_and_exit; sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); return; segv_and_exit: @@ -209,10 +207,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) } sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); return; segv: force_sig(SIGSEGV, current); @@ -470,6 +465,7 @@ static inline int handle_signal(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int err; if (ka->sa.sa_flags & SA_SIGINFO) @@ -480,12 +476,10 @@ handle_signal(unsigned long signr, struct k_sigaction *ka, if (err) return err; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NOMASK)) - sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigaddset(&blocked, signr); + set_current_blocked(&blocked); tracehook_signal_handler(signr, info, ka, regs, 0); @@ -581,7 +575,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + set_current_blocked(¤t->saved_sigmask); } } diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 47509df3b89..a2b81598d90 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -70,10 +70,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) goto do_sigsegv; } sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); } if (test_thread_flag(TIF_32BIT)) { pc &= 0xffffffff; @@ -242,12 +239,13 @@ struct rt_signal_frame { static long _sigpause_common(old_sigset_t set) { - set &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); + sigset_t blocked; + current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, set); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + + set &= _BLOCKABLE; + siginitset(&blocked, set); + set_current_blocked(&blocked); current->state = TASK_INTERRUPTIBLE; schedule(); @@ -327,10 +325,7 @@ void do_rt_sigreturn(struct pt_regs *regs) pt_regs_clear_syscall(regs); sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); return; segv: force_sig(SIGSEGV, current); @@ -484,18 +479,17 @@ static inline int handle_signal(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int err; err = setup_rt_frame(ka, regs, signr, oldset, (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL); if (err) return err; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NOMASK)) - sigaddset(¤t->blocked,signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigaddset(&blocked, signr); + set_current_blocked(&blocked); tracehook_signal_handler(signr, info, ka, regs, 0); @@ -601,7 +595,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) */ if (current_thread_info()->status & TS_RESTORE_SIGMASK) { current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + set_current_blocked(¤t->saved_sigmask); } } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 581531dbc8b..8e073d80213 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -511,6 +511,11 @@ static void __init read_obp_translations(void) for (i = 0; i < prom_trans_ents; i++) prom_trans[i].data &= ~0x0003fe0000000000UL; } + + /* Force execute bit on. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data |= (tlb_type == hypervisor ? + _PAGE_EXEC_4V : _PAGE_EXEC_4U); } static void __init hypervisor_tlb_lock(unsigned long vaddr, diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c index e485a680499..13c2169822a 100644 --- a/arch/sparc/mm/leon_mm.c +++ b/arch/sparc/mm/leon_mm.c @@ -162,7 +162,7 @@ ready: printk(KERN_INFO "swprobe: padde %x\n", paddr_calc); if (paddr) *paddr = paddr_calc; - return paddrbase; + return pte; } void leon_flush_icache_all(void) diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index fc94607f0bd..aecc8ed5f39 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -21,7 +21,7 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/irqflags.h> -#include <linux/atomic.h> +#include <asm/atomic_32.h> #include <asm/asm-offsets.h> #include <hv/hypervisor.h> #include <arch/abi.h> diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 1f75a2a5610..30638042691 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -70,7 +70,7 @@ */ #include <linux/linkage.h> -#include <linux/atomic.h> +#include <asm/atomic_32.h> #include <asm/page.h> #include <asm/processor.h> diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86 index d31ecf346b4..21bebe63df6 100644 --- a/arch/um/Kconfig.x86 +++ b/arch/um/Kconfig.x86 @@ -10,6 +10,10 @@ config CMPXCHG_LOCAL bool default n +config CMPXCHG_DOUBLE + bool + default n + source "arch/x86/Kconfig.cpu" endmenu diff --git a/arch/um/Makefile b/arch/um/Makefile index fab8121d2b3..c0f712cc7c5 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -41,7 +41,7 @@ KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH) KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Din6addr_loopback=kernel_in6addr_loopback \ - -Din6addr_any=kernel_in6addr_any + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr KBUILD_AFLAGS += $(ARCH_INCLUDE) diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index d51c404239a..364c8a15c4c 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -399,8 +399,8 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data) * is done under a spinlock. Checking whether the device is in use is * line->tty->count > 1, also under the spinlock. * - * tty->count serves to decide whether the device should be enabled or - * disabled on the host. If it's equal to 1, then we are doing the + * line->count serves to decide whether the device should be enabled or + * disabled on the host. If it's equal to 0, then we are doing the * first open or last close. Otherwise, open and close just return. */ @@ -414,16 +414,16 @@ int line_open(struct line *lines, struct tty_struct *tty) goto out_unlock; err = 0; - if (tty->count > 1) + if (line->count++) goto out_unlock; - spin_unlock(&line->count_lock); - + BUG_ON(tty->driver_data); tty->driver_data = line; line->tty = tty; + spin_unlock(&line->count_lock); err = enable_chan(line); - if (err) + if (err) /* line_close() will be called by our caller */ return err; INIT_DELAYED_WORK(&line->task, line_timer_cb); @@ -436,7 +436,7 @@ int line_open(struct line *lines, struct tty_struct *tty) chan_window_size(&line->chan_list, &tty->winsize.ws_row, &tty->winsize.ws_col); - return err; + return 0; out_unlock: spin_unlock(&line->count_lock); @@ -460,17 +460,16 @@ void line_close(struct tty_struct *tty, struct file * filp) flush_buffer(line); spin_lock(&line->count_lock); - if (!line->valid) - goto out_unlock; + BUG_ON(!line->valid); - if (tty->count > 1) + if (--line->count) goto out_unlock; - spin_unlock(&line->count_lock); - line->tty = NULL; tty->driver_data = NULL; + spin_unlock(&line->count_lock); + if (line->sigio) { unregister_winch(tty); line->sigio = 0; @@ -498,7 +497,7 @@ static int setup_one_line(struct line *lines, int n, char *init, int init_prio, spin_lock(&line->count_lock); - if (line->tty != NULL) { + if (line->count) { *error_out = "Device is already open"; goto out; } @@ -722,41 +721,53 @@ struct winch { int pid; struct tty_struct *tty; unsigned long stack; + struct work_struct work; }; -static void free_winch(struct winch *winch, int free_irq_ok) +static void __free_winch(struct work_struct *work) { - if (free_irq_ok) - free_irq(WINCH_IRQ, winch); - - list_del(&winch->list); + struct winch *winch = container_of(work, struct winch, work); + free_irq(WINCH_IRQ, winch); if (winch->pid != -1) os_kill_process(winch->pid, 1); - if (winch->fd != -1) - os_close_file(winch->fd); if (winch->stack != 0) free_stack(winch->stack, 0); kfree(winch); } +static void free_winch(struct winch *winch) +{ + int fd = winch->fd; + winch->fd = -1; + if (fd != -1) + os_close_file(fd); + list_del(&winch->list); + __free_winch(&winch->work); +} + static irqreturn_t winch_interrupt(int irq, void *data) { struct winch *winch = data; struct tty_struct *tty; struct line *line; + int fd = winch->fd; int err; char c; - if (winch->fd != -1) { - err = generic_read(winch->fd, &c, NULL); + if (fd != -1) { + err = generic_read(fd, &c, NULL); if (err < 0) { if (err != -EAGAIN) { + winch->fd = -1; + list_del(&winch->list); + os_close_file(fd); printk(KERN_ERR "winch_interrupt : " "read failed, errno = %d\n", -err); printk(KERN_ERR "fd %d is losing SIGWINCH " "support\n", winch->tty_fd); - free_winch(winch, 0); + INIT_WORK(&winch->work, __free_winch); + schedule_work(&winch->work); return IRQ_HANDLED; } goto out; @@ -828,7 +839,7 @@ static void unregister_winch(struct tty_struct *tty) list_for_each_safe(ele, next, &winch_handlers) { winch = list_entry(ele, struct winch, list); if (winch->tty == tty) { - free_winch(winch, 1); + free_winch(winch); break; } } @@ -844,7 +855,7 @@ static void winch_cleanup(void) list_for_each_safe(ele, next, &winch_handlers) { winch = list_entry(ele, struct winch, list); - free_winch(winch, 1); + free_winch(winch); } spin_unlock(&winch_handler_lock); diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index 8ac7146c237..2e1de572860 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -123,6 +123,7 @@ static int xterm_open(int input, int output, int primary, void *d, err = -errno; printk(UM_KERN_ERR "xterm_open : unlink failed, errno = %d\n", errno); + close(fd); return err; } close(fd); diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index ae084ad1a3a..1a7d2757fe0 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -42,10 +42,6 @@ extern long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern unsigned long getreg(struct task_struct *child, int regno); extern int putreg(struct task_struct *child, int regno, unsigned long value); -extern int get_fpregs(struct user_i387_struct __user *buf, - struct task_struct *child); -extern int set_fpregs(struct user_i387_struct __user *buf, - struct task_struct *child); extern int arch_copy_tls(struct task_struct *new); extern void clear_flushed_tls(struct task_struct *task); diff --git a/arch/um/include/shared/line.h b/arch/um/include/shared/line.h index 72f4f25af24..63df3ca02ac 100644 --- a/arch/um/include/shared/line.h +++ b/arch/um/include/shared/line.h @@ -33,6 +33,7 @@ struct line_driver { struct line { struct tty_struct *tty; spinlock_t count_lock; + unsigned long count; int valid; char *init_str; diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index b0b4589e0eb..f1e0aa56c52 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -16,7 +16,7 @@ extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int save_registers(int pid, struct uml_pt_regs *regs); extern int restore_registers(int pid, struct uml_pt_regs *regs); extern int init_registers(int pid); -extern void get_safe_registers(unsigned long *regs); +extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern unsigned long get_thread_reg(int reg, jmp_buf *buf); extern int get_fp_registers(int pid, unsigned long *regs); extern int put_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index fab4371184f..21c1ae7c3d7 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -202,7 +202,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, arch_copy_thread(¤t->thread.arch, &p->thread.arch); } else { - get_safe_registers(p->thread.regs.regs.gp); + get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; } diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 701b672c112..c9da32b0c70 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -50,23 +50,11 @@ long arch_ptrace(struct task_struct *child, long request, void __user *vp = p; switch (request) { - /* read word at location addr. */ - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: ret = peek_user(child, addr, data); break; - /* write the word at location addr. */ - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: ret = poke_user(child, addr, data); @@ -107,16 +95,6 @@ long arch_ptrace(struct task_struct *child, long request, break; } #endif -#ifdef PTRACE_GETFPREGS - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(vp, child); - break; -#endif -#ifdef PTRACE_SETFPREGS - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(vp, child); - break; -#endif case PTRACE_GET_THREAD_AREA: ret = ptrace_get_thread_area(child, addr, vp); break; @@ -154,12 +132,6 @@ long arch_ptrace(struct task_struct *child, long request, break; } #endif -#ifdef PTRACE_ARCH_PRCTL - case PTRACE_ARCH_PRCTL: - /* XXX Calls ptrace on the host - needs some SMP thinking */ - ret = arch_prctl(child, data, (void __user *) addr); - break; -#endif default: ret = ptrace_request(child, request, addr, data); if (ret == -EIO) diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index 830fe6a1518..b866b9e3bef 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -8,6 +8,8 @@ #include <string.h> #include <sys/ptrace.h> #include "sysdep/ptrace.h" +#include "sysdep/ptrace_user.h" +#include "registers.h" int save_registers(int pid, struct uml_pt_regs *regs) { @@ -32,6 +34,7 @@ int restore_registers(int pid, struct uml_pt_regs *regs) /* This is set once at boot time and not changed thereafter */ static unsigned long exec_regs[MAX_REG_NR]; +static unsigned long exec_fp_regs[FP_SIZE]; int init_registers(int pid) { @@ -42,10 +45,14 @@ int init_registers(int pid) return -errno; arch_init_registers(pid); + get_fp_registers(pid, exec_fp_regs); return 0; } -void get_safe_registers(unsigned long *regs) +void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) { memcpy(regs, exec_regs, sizeof(exec_regs)); + + if (fp_regs) + memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs)); } diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index d261f170d12..e771398be5f 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -39,7 +39,7 @@ static unsigned long syscall_regs[MAX_REG_NR]; static int __init init_syscall_regs(void) { - get_safe_registers(syscall_regs); + get_safe_registers(syscall_regs, NULL); syscall_regs[REGS_IP_INDEX] = STUB_CODE + ((unsigned long) &batch_syscall_stub - (unsigned long) &__syscall_stub_start); diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index d6e0a2234b8..dee0e8cf8ad 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -373,6 +373,9 @@ void userspace(struct uml_pt_regs *regs) if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) fatal_sigsegv(); + if (put_fp_registers(pid, regs->fp)) + fatal_sigsegv(); + /* Now we set local_using_sysemu to be used for one loop */ local_using_sysemu = get_using_sysemu(); @@ -399,6 +402,12 @@ void userspace(struct uml_pt_regs *regs) fatal_sigsegv(); } + if (get_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "userspace - get_fp_registers failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ if (WIFSTOPPED(status)) { @@ -457,10 +466,11 @@ void userspace(struct uml_pt_regs *regs) } static unsigned long thread_regs[MAX_REG_NR]; +static unsigned long thread_fp_regs[FP_SIZE]; static int __init init_thread_regs(void) { - get_safe_registers(thread_regs); + get_safe_registers(thread_regs, thread_fp_regs); /* Set parent's instruction pointer to start of clone-stub */ thread_regs[REGS_IP_INDEX] = STUB_CODE + (unsigned long) stub_clone_handler - @@ -503,6 +513,13 @@ int copy_context_skas0(unsigned long new_stack, int pid) return err; } + err = put_fp_registers(pid, thread_fp_regs); + if (err < 0) { + printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers " + "failed, pid = %d, err = %d\n", pid, err); + return err; + } + /* set a well known return code for detection of child write failure */ child_data->err = 12345678; diff --git a/arch/um/sys-i386/asm/ptrace.h b/arch/um/sys-i386/asm/ptrace.h index 0273e4d09af..5d2a5911253 100644 --- a/arch/um/sys-i386/asm/ptrace.h +++ b/arch/um/sys-i386/asm/ptrace.h @@ -42,11 +42,6 @@ */ struct user_desc; -extern int get_fpxregs(struct user_fxsr_struct __user *buf, - struct task_struct *child); -extern int set_fpxregs(struct user_fxsr_struct __user *buf, - struct task_struct *tsk); - extern int ptrace_get_thread_area(struct task_struct *child, int idx, struct user_desc __user *user_desc); diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index d23b2d3ea38..3375c271785 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -145,7 +145,7 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long __user *) data); } -int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int err, n, cpu = ((struct thread_info *) child->stack)->cpu; struct user_i387_struct fpregs; @@ -161,7 +161,7 @@ int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) return n; } -int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int n, cpu = ((struct thread_info *) child->stack)->cpu; struct user_i387_struct fpregs; @@ -174,7 +174,7 @@ int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) (unsigned long *) &fpregs); } -int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) +static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) { int err, n, cpu = ((struct thread_info *) child->stack)->cpu; struct user_fxsr_struct fpregs; @@ -190,7 +190,7 @@ int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) return n; } -int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) +static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) { int n, cpu = ((struct thread_info *) child->stack)->cpu; struct user_fxsr_struct fpregs; @@ -206,5 +206,23 @@ int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { - return -EIO; + int ret = -EIO; + void __user *datap = (void __user *) data; + switch (request) { + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + ret = get_fpregs(datap, child); + break; + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + ret = set_fpregs(datap, child); + break; + case PTRACE_GETFPXREGS: /* Get the child FPU state. */ + ret = get_fpxregs(datap, child); + break; + case PTRACE_SETFPXREGS: /* Set the child FPU state. */ + ret = set_fpxregs(datap, child); + break; + default: + ret = -EIO; + } + return ret; } diff --git a/arch/um/sys-i386/shared/sysdep/ptrace.h b/arch/um/sys-i386/shared/sysdep/ptrace.h index d50e62e0707..c398a507611 100644 --- a/arch/um/sys-i386/shared/sysdep/ptrace.h +++ b/arch/um/sys-i386/shared/sysdep/ptrace.h @@ -53,6 +53,7 @@ extern int sysemu_supported; struct uml_pt_regs { unsigned long gp[MAX_REG_NR]; + unsigned long fp[HOST_FPX_SIZE]; struct faultinfo faultinfo; long syscall; int is_user; diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index f43613643cd..4005506834f 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -145,7 +145,7 @@ int is_syscall(unsigned long addr) return instr == 0x050f; } -int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int err, n, cpu = ((struct thread_info *) child->stack)->cpu; long fpregs[HOST_FP_SIZE]; @@ -162,7 +162,7 @@ int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) return n; } -int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int n, cpu = ((struct thread_info *) child->stack)->cpu; long fpregs[HOST_FP_SIZE]; @@ -182,12 +182,16 @@ long subarch_ptrace(struct task_struct *child, long request, void __user *datap = (void __user *) data; switch (request) { - case PTRACE_GETFPXREGS: /* Get the child FPU state. */ + case PTRACE_GETFPREGS: /* Get the child FPU state. */ ret = get_fpregs(datap, child); break; - case PTRACE_SETFPXREGS: /* Set the child FPU state. */ + case PTRACE_SETFPREGS: /* Set the child FPU state. */ ret = set_fpregs(datap, child); break; + case PTRACE_ARCH_PRCTL: + /* XXX Calls ptrace on the host - needs some SMP thinking */ + ret = arch_prctl(child, data, (void __user *) addr); + break; } return ret; diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace.h b/arch/um/sys-x86_64/shared/sysdep/ptrace.h index fdba5457947..8ee8f8e12af 100644 --- a/arch/um/sys-x86_64/shared/sysdep/ptrace.h +++ b/arch/um/sys-x86_64/shared/sysdep/ptrace.h @@ -85,6 +85,7 @@ struct uml_pt_regs { unsigned long gp[MAX_REG_NR]; + unsigned long fp[HOST_FP_SIZE]; struct faultinfo faultinfo; long syscall; int is_user; diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 4554cc6fb96..091508b533b 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -16,7 +16,6 @@ #endif .macro altinstruction_entry orig alt feature orig_len alt_len - .align 8 .long \orig - . .long \alt - . .word \feature diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 23fb6d79f20..37ad100a221 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -48,9 +48,6 @@ struct alt_instr { u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction, <= instrlen */ -#ifdef CONFIG_X86_64 - u32 pad2; -#endif }; extern void alternative_instructions(void); @@ -83,7 +80,6 @@ static inline int alternatives_text_reserved(void *start, void *end) \ "661:\n\t" oldinstr "\n662:\n" \ ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ " .long 661b - .\n" /* label */ \ " .long 663f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4258aac99a6..88b23a43f34 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -332,7 +332,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) asm goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" - _ASM_ALIGN "\n" " .long 1b - .\n" " .long 0\n" /* no replacement */ " .word %P0\n" /* feature bit */ @@ -350,7 +349,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) asm volatile("1: movb $0,%0\n" "2:\n" ".section .altinstructions,\"a\"\n" - _ASM_ALIGN "\n" " .long 1b - .\n" " .long 3f - .\n" " .word %P1\n" /* feature bit */ diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 3f2ad2640d8..ccdbc16b894 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -42,8 +42,11 @@ int mach_set_rtc_mmss(unsigned long nowtime) { int real_seconds, real_minutes, cmos_minutes; unsigned char save_control, save_freq_select; + unsigned long flags; int retval = 0; + spin_lock_irqsave(&rtc_lock, flags); + /* tell the clock it's being set */ save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); @@ -93,12 +96,17 @@ int mach_set_rtc_mmss(unsigned long nowtime) CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock_irqrestore(&rtc_lock, flags); + return retval; } unsigned long mach_get_cmos_time(void) { unsigned int status, year, mon, day, hour, min, sec, century = 0; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); /* * If UIP is clear, then we have >= 244 microseconds before @@ -125,6 +133,8 @@ unsigned long mach_get_cmos_time(void) status = CMOS_READ(RTC_CONTROL); WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); + spin_unlock_irqrestore(&rtc_lock, flags); + if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { sec = bcd2bin(sec); min = bcd2bin(min); @@ -169,24 +179,15 @@ EXPORT_SYMBOL(rtc_cmos_write); int update_persistent_clock(struct timespec now) { - unsigned long flags; - int retval; - - spin_lock_irqsave(&rtc_lock, flags); - retval = x86_platform.set_wallclock(now.tv_sec); - spin_unlock_irqrestore(&rtc_lock, flags); - - return retval; + return x86_platform.set_wallclock(now.tv_sec); } /* not static: needed by APM */ void read_persistent_clock(struct timespec *ts) { - unsigned long retval, flags; + unsigned long retval; - spin_lock_irqsave(&rtc_lock, flags); retval = x86_platform.get_wallclock(); - spin_unlock_irqrestore(&rtc_lock, flags); ts->tv_sec = retval; ts->tv_nsec = 0; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 18ae83dd1cd..b56c65de384 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -56,7 +56,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), }; -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; +static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; static int __init vsyscall_setup(char *str) { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6f08bc940fa..8b4cc5f067d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3603,7 +3603,7 @@ done_prefixes: break; case Src2CL: ctxt->src2.bytes = 1; - ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0x8; + ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff; break; case Src2ImmByte: rc = decode_imm(ctxt, &ctxt->src2, 1, true); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1c5b69373a0..8e8da7960db 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -400,7 +400,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) /* xchg acts as a barrier before the setting of the high bits */ orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); - orig.spte_high = ssptep->spte_high = sspte.spte_high; + orig.spte_high = ssptep->spte_high; + ssptep->spte_high = sspte.spte_high; count_spte_clear(sptep, spte); return orig.spte; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 30326443ab8..87488b93a65 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -63,9 +63,8 @@ static void __init find_early_table_space(unsigned long end, int use_pse, #ifdef CONFIG_X86_32 /* for fixmap */ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); - - good_end = max_pfn_mapped << PAGE_SHIFT; #endif + good_end = max_pfn_mapped << PAGE_SHIFT; base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); if (base == MEMBLOCK_ERROR) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 039d91315bc..404f21a3ff9 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -43,6 +43,17 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"), }, }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */ + /* 2006 AMD HT/VIA system with two host bridges */ + { + .callback = set_use_crs, + .ident = "ASUS M2V-MX SE", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"), + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + }, {} }; diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 58425adc22c..fe73276e026 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -678,38 +678,40 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) pentry = (struct sfi_device_table_entry *)sb->pentry; for (i = 0; i < num; i++, pentry++) { - if (pentry->irq != (u8)0xff) { /* native RTE case */ + int irq = pentry->irq; + + if (irq != (u8)0xff) { /* native RTE case */ /* these SPI2 devices are not exposed to system as PCI * devices, but they have separate RTE entry in IOAPIC * so we have to enable them one by one here */ - ioapic = mp_find_ioapic(pentry->irq); + ioapic = mp_find_ioapic(irq); irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = pentry->irq; + irq_attr.ioapic_pin = irq; irq_attr.trigger = 1; irq_attr.polarity = 1; - io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr); + io_apic_set_pci_routing(NULL, irq, &irq_attr); } else - pentry->irq = 0; /* No irq */ + irq = 0; /* No irq */ switch (pentry->type) { case SFI_DEV_TYPE_IPC: /* ID as IRQ is a hack that will go away */ - pdev = platform_device_alloc(pentry->name, pentry->irq); + pdev = platform_device_alloc(pentry->name, irq); if (pdev == NULL) { pr_err("out of memory for SFI platform device '%s'.\n", pentry->name); continue; } - install_irq_resource(pdev, pentry->irq); + install_irq_resource(pdev, irq); pr_debug("info[%2d]: IPC bus, name = %16.16s, " - "irq = 0x%2x\n", i, pentry->name, pentry->irq); + "irq = 0x%2x\n", i, pentry->name, irq); sfi_handle_ipc_dev(pdev); break; case SFI_DEV_TYPE_SPI: memset(&spi_info, 0, sizeof(spi_info)); strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); - spi_info.irq = pentry->irq; + spi_info.irq = irq; spi_info.bus_num = pentry->host_num; spi_info.chip_select = pentry->addr; spi_info.max_speed_hz = pentry->max_freq; @@ -726,7 +728,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) memset(&i2c_info, 0, sizeof(i2c_info)); bus = pentry->host_num; strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); - i2c_info.irq = pentry->irq; + i2c_info.irq = irq; i2c_info.addr = pentry->addr; pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " "irq = 0x%2x, addr = 0x%x\n", i, bus, diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c index 73d70d65e76..6d5dbcdd444 100644 --- a/arch/x86/platform/mrst/vrtc.c +++ b/arch/x86/platform/mrst/vrtc.c @@ -58,8 +58,11 @@ EXPORT_SYMBOL_GPL(vrtc_cmos_write); unsigned long vrtc_get_time(void) { u8 sec, min, hour, mday, mon; + unsigned long flags; u32 year; + spin_lock_irqsave(&rtc_lock, flags); + while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) cpu_relax(); @@ -70,6 +73,8 @@ unsigned long vrtc_get_time(void) mon = vrtc_cmos_read(RTC_MONTH); year = vrtc_cmos_read(RTC_YEAR); + spin_unlock_irqrestore(&rtc_lock, flags); + /* vRTC YEAR reg contains the offset to 1960 */ year += 1960; @@ -83,8 +88,10 @@ unsigned long vrtc_get_time(void) int vrtc_set_mmss(unsigned long nowtime) { int real_sec, real_min; + unsigned long flags; int vrtc_min; + spin_lock_irqsave(&rtc_lock, flags); vrtc_min = vrtc_cmos_read(RTC_MINUTES); real_sec = nowtime % 60; @@ -95,6 +102,8 @@ int vrtc_set_mmss(unsigned long nowtime) vrtc_cmos_write(real_sec, RTC_SECONDS); vrtc_cmos_write(real_min, RTC_MINUTES); + spin_unlock_irqrestore(&rtc_lock, flags); + return 0; } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 20a61427506..3dd53f997b1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1721,10 +1721,8 @@ void __init xen_setup_machphys_mapping(void) machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; } #ifdef CONFIG_X86_32 - if ((machine_to_phys_mapping + machine_to_phys_nr) - < machine_to_phys_mapping) - machine_to_phys_nr = (unsigned long *)NULL - - machine_to_phys_mapping; + WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) + < machine_to_phys_mapping); #endif } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c3b8d440873..46d6d21dbdb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -306,10 +306,12 @@ char * __init xen_memory_setup(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); extra_limit = xen_get_max_pages(); - if (extra_limit >= max_pfn) - extra_pages = extra_limit - max_pfn; - else - extra_pages = 0; + if (max_pfn + extra_pages > extra_limit) { + if (extra_limit > max_pfn) + extra_pages = extra_limit - max_pfn; + else + extra_pages = 0; + } extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d4fc6d454f8..041d4fe9dfe 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -532,7 +532,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) WARN_ON(xen_smp_intr_init(0)); xen_init_lock_cpu(0); - xen_init_spinlocks(); } static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5158c505bef..163b4679556 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void) struct pvclock_vcpu_time_info *src; cycle_t ret; - src = &get_cpu_var(xen_vcpu)->time; + preempt_disable_notrace(); + src = &__get_cpu_var(xen_vcpu)->time; ret = pvclock_clocksource_read(src); - put_cpu_var(xen_vcpu); + preempt_enable_notrace(); return ret; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index bcaf16ee6ad..b596e54ddd7 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -785,10 +785,10 @@ static int blkio_policy_parse_and_set(char *buf, { char *s[4], *p, *major_s = NULL, *minor_s = NULL; int ret; - unsigned long major, minor, temp; + unsigned long major, minor; int i = 0; dev_t dev; - u64 bps, iops; + u64 temp; memset(s, 0, sizeof(s)); @@ -826,20 +826,23 @@ static int blkio_policy_parse_and_set(char *buf, dev = MKDEV(major, minor); - ret = blkio_check_dev_num(dev); + ret = strict_strtoull(s[1], 10, &temp); if (ret) - return ret; + return -EINVAL; - newpn->dev = dev; + /* For rule removal, do not check for device presence. */ + if (temp) { + ret = blkio_check_dev_num(dev); + if (ret) + return ret; + } - if (s[1] == NULL) - return -EINVAL; + newpn->dev = dev; switch (plid) { case BLKIO_POLICY_PROP: - ret = strict_strtoul(s[1], 10, &temp); - if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || - temp > BLKIO_WEIGHT_MAX) + if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || + temp > BLKIO_WEIGHT_MAX) return -EINVAL; newpn->plid = plid; @@ -850,26 +853,18 @@ static int blkio_policy_parse_and_set(char *buf, switch(fileid) { case BLKIO_THROTL_read_bps_device: case BLKIO_THROTL_write_bps_device: - ret = strict_strtoull(s[1], 10, &bps); - if (ret) - return -EINVAL; - newpn->plid = plid; newpn->fileid = fileid; - newpn->val.bps = bps; + newpn->val.bps = temp; break; case BLKIO_THROTL_read_iops_device: case BLKIO_THROTL_write_iops_device: - ret = strict_strtoull(s[1], 10, &iops); - if (ret) - return -EINVAL; - - if (iops > THROTL_IOPS_MAX) + if (temp > THROTL_IOPS_MAX) return -EINVAL; newpn->plid = plid; newpn->fileid = fileid; - newpn->val.iops = (unsigned int)iops; + newpn->val.iops = (unsigned int)temp; break; } break; diff --git a/block/blk-core.c b/block/blk-core.c index 90e1ffdeb41..d34433ae791 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -348,9 +348,10 @@ void blk_put_queue(struct request_queue *q) EXPORT_SYMBOL(blk_put_queue); /* - * Note: If a driver supplied the queue lock, it should not zap that lock - * unexpectedly as some queue cleanup components like elevator_exit() and - * blk_throtl_exit() need queue lock. + * Note: If a driver supplied the queue lock, it is disconnected + * by this function. The actual state of the lock doesn't matter + * here as the request_queue isn't accessible after this point + * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. */ void blk_cleanup_queue(struct request_queue *q) { @@ -367,10 +368,8 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); - if (q->elevator) - elevator_exit(q->elevator); - - blk_throtl_exit(q); + if (q->queue_lock != &q->__queue_lock) + q->queue_lock = &q->__queue_lock; blk_put_queue(q); } @@ -1167,7 +1166,7 @@ static bool bio_attempt_front_merge(struct request_queue *q, * true if merge was successful, otherwise false. */ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, - struct bio *bio) + struct bio *bio, unsigned int *request_count) { struct blk_plug *plug; struct request *rq; @@ -1176,10 +1175,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, plug = tsk->plug; if (!plug) goto out; + *request_count = 0; list_for_each_entry_reverse(rq, &plug->list, queuelist) { int el_ret; + (*request_count)++; + if (rq->q != q) continue; @@ -1219,6 +1221,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; + unsigned int request_count = 0; /* * low level driver can indicate that it wants pages above a @@ -1237,7 +1240,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(current, q, bio)) + if (attempt_plug_merge(current, q, bio, &request_count)) goto out; spin_lock_irq(q->queue_lock); @@ -1302,11 +1305,10 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } + if (request_count >= BLK_MAX_REQUEST_COUNT) + blk_flush_plug_list(plug, false); list_add_tail(&req->queuelist, &plug->list); - plug->count++; drive_stat_acct(req, 1); - if (plug->count >= BLK_MAX_REQUEST_COUNT) - blk_flush_plug_list(plug, false); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); @@ -2634,7 +2636,6 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; - plug->count = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2718,7 +2719,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; list_splice_init(&plug->list, &list); - plug->count = 0; if (plug->should_sort) { list_sort(NULL, &list, plug_rq_cmp); diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 58340d0cb23..1366a89d8e6 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -115,7 +115,7 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU */ - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { + if (req->cpu != -1) { ccpu = req->cpu; if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { ccpu = blk_cpu_to_group(ccpu); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0ee17b5e7fb..60fda88c57f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ret = queue_var_store(&val, page, count); spin_lock_irq(q->queue_lock); - if (val) { + if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - if (val == 2) - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); - } else { + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 1) { + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 0) { queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } @@ -477,6 +479,11 @@ static void blk_release_queue(struct kobject *kobj) blk_sync_queue(q); + if (q->elevator) + elevator_exit(q->elevator); + + blk_throtl_exit(q); + if (rl->rq_pool) mempool_destroy(rl->rq_pool); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a33bd4377c6..16ace89613b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -130,8 +130,8 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; - /* pending metadata requests */ - int meta_pending; + /* pending priority requests */ + int prio_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -684,8 +684,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq_is_sync(rq1) != rq_is_sync(rq2)) return rq_is_sync(rq1) ? rq1 : rq2; - if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) - return rq1->cmd_flags & REQ_META ? rq1 : rq2; + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) + return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1612,9 +1612,9 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); - if (rq->cmd_flags & REQ_META) { - WARN_ON(!cfqq->meta_pending); - cfqq->meta_pending--; + if (rq->cmd_flags & REQ_PRIO) { + WARN_ON(!cfqq->prio_pending); + cfqq->prio_pending--; } } @@ -3372,7 +3372,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ - if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) + if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) return true; /* @@ -3439,8 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; - if (rq->cmd_flags & REQ_META) - cfqq->meta_pending++; + if (rq->cmd_flags & REQ_PRIO) + cfqq->prio_pending++; cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c index be442561693..7835b8fc94d 100644 --- a/crypto/ghash-generic.c +++ b/crypto/ghash-generic.c @@ -67,6 +67,9 @@ static int ghash_update(struct shash_desc *desc, struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); u8 *dst = dctx->buffer; + if (!ctx->gf128) + return -ENOKEY; + if (dctx->bytes) { int n = min(srclen, dctx->bytes); u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); @@ -119,6 +122,9 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); u8 *buf = dctx->buffer; + if (!ctx->gf128) + return -ENOKEY; + ghash_flush(ctx, dctx); memcpy(dst, buf, GHASH_BLOCK_SIZE); diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index bc533dde16c..f895a244ca7 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -121,7 +121,7 @@ /* Maximum sleep allowed via Sleep() operator */ -#define ACPI_MAX_SLEEP 20000 /* Two seconds */ +#define ACPI_MAX_SLEEP 2000 /* Two seconds */ /****************************************************************************** * diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index c34aa51af4e..e3f47872ec2 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -13,6 +13,7 @@ config ACPI_APEI_GHES bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED + select IRQ_WORK select LLIST select GENERIC_ALLOCATOR help diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 8041248fce9..61540360d5c 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -618,7 +618,7 @@ int apei_osc_setup(void) }; capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - capbuf[OSC_SUPPORT_TYPE] = 0; + capbuf[OSC_SUPPORT_TYPE] = 1; capbuf[OSC_CONTROL_TYPE] = 0; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 2c18d584066..b97294e2d95 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -42,6 +42,22 @@ static struct pm_clk_data *__to_pcd(struct device *dev) } /** + * pm_clk_acquire - Acquire a device clock. + * @dev: Device whose clock is to be acquired. + * @ce: PM clock entry corresponding to the clock. + */ +static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) +{ + ce->clk = clk_get(dev, ce->con_id); + if (IS_ERR(ce->clk)) { + ce->status = PCE_STATUS_ERROR; + } else { + ce->status = PCE_STATUS_ACQUIRED; + dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id); + } +} + +/** * pm_clk_add - Start using a device clock for power management. * @dev: Device whose clock is going to be used for power management. * @con_id: Connection ID of the clock. @@ -73,6 +89,8 @@ int pm_clk_add(struct device *dev, const char *con_id) } } + pm_clk_acquire(dev, ce); + spin_lock_irq(&pcd->lock); list_add_tail(&ce->node, &pcd->clock_list); spin_unlock_irq(&pcd->lock); @@ -82,17 +100,12 @@ int pm_clk_add(struct device *dev, const char *con_id) /** * __pm_clk_remove - Destroy PM clock entry. * @ce: PM clock entry to destroy. - * - * This routine must be called under the spinlock protecting the PM list of - * clocks corresponding the the @ce's device. */ static void __pm_clk_remove(struct pm_clock_entry *ce) { if (!ce) return; - list_del(&ce->node); - if (ce->status < PCE_STATUS_ERROR) { if (ce->status == PCE_STATUS_ENABLED) clk_disable(ce->clk); @@ -126,18 +139,22 @@ void pm_clk_remove(struct device *dev, const char *con_id) spin_lock_irq(&pcd->lock); list_for_each_entry(ce, &pcd->clock_list, node) { - if (!con_id && !ce->con_id) { - __pm_clk_remove(ce); - break; - } else if (!con_id || !ce->con_id) { + if (!con_id && !ce->con_id) + goto remove; + else if (!con_id || !ce->con_id) continue; - } else if (!strcmp(con_id, ce->con_id)) { - __pm_clk_remove(ce); - break; - } + else if (!strcmp(con_id, ce->con_id)) + goto remove; } spin_unlock_irq(&pcd->lock); + return; + + remove: + list_del(&ce->node); + spin_unlock_irq(&pcd->lock); + + __pm_clk_remove(ce); } /** @@ -175,20 +192,27 @@ void pm_clk_destroy(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce, *c; + struct list_head list; if (!pcd) return; dev->power.subsys_data = NULL; + INIT_LIST_HEAD(&list); spin_lock_irq(&pcd->lock); list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node) - __pm_clk_remove(ce); + list_move(&ce->node, &list); spin_unlock_irq(&pcd->lock); kfree(pcd); + + list_for_each_entry_safe_reverse(ce, c, &list, node) { + list_del(&ce->node); + __pm_clk_remove(ce); + } } #endif /* CONFIG_PM */ @@ -196,23 +220,6 @@ void pm_clk_destroy(struct device *dev) #ifdef CONFIG_PM_RUNTIME /** - * pm_clk_acquire - Acquire a device clock. - * @dev: Device whose clock is to be acquired. - * @con_id: Connection ID of the clock. - */ -static void pm_clk_acquire(struct device *dev, - struct pm_clock_entry *ce) -{ - ce->clk = clk_get(dev, ce->con_id); - if (IS_ERR(ce->clk)) { - ce->status = PCE_STATUS_ERROR; - } else { - ce->status = PCE_STATUS_ACQUIRED; - dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id); - } -} - -/** * pm_clk_suspend - Disable clocks in a device's PM clock list. * @dev: Device to disable the clocks for. */ @@ -230,9 +237,6 @@ int pm_clk_suspend(struct device *dev) spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry_reverse(ce, &pcd->clock_list, node) { - if (ce->status == PCE_STATUS_NONE) - pm_clk_acquire(dev, ce); - if (ce->status < PCE_STATUS_ERROR) { clk_disable(ce->clk); ce->status = PCE_STATUS_ACQUIRED; @@ -262,9 +266,6 @@ int pm_clk_resume(struct device *dev) spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry(ce, &pcd->clock_list, node) { - if (ce->status == PCE_STATUS_NONE) - pm_clk_acquire(dev, ce); - if (ce->status < PCE_STATUS_ERROR) { clk_enable(ce->clk); ce->status = PCE_STATUS_ENABLED; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 98de8f41867..9955a53733b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4250,7 +4250,7 @@ static int __init floppy_init(void) use_virtual_dma = can_use_virtual_dma & 1; fdc_state[0].address = FDC1; if (fdc_state[0].address == -1) { - del_timer(&fd_timeout); + del_timer_sync(&fd_timeout); err = -ENODEV; goto out_unreg_region; } @@ -4261,7 +4261,7 @@ static int __init floppy_init(void) fdc = 0; /* reset fdc in case of unexpected interrupt */ err = floppy_grab_irq_and_dma(); if (err) { - del_timer(&fd_timeout); + del_timer_sync(&fd_timeout); err = -EBUSY; goto out_unreg_region; } @@ -4318,7 +4318,7 @@ static int __init floppy_init(void) user_reset_fdc(-1, FD_RESET_ALWAYS, false); } fdc = 0; - del_timer(&fd_timeout); + del_timer_sync(&fd_timeout); current_drive = 0; initialized = true; if (have_no_fdc) { @@ -4368,7 +4368,7 @@ out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: while (dr--) { - del_timer(&motor_off_timer[dr]); + del_timer_sync(&motor_off_timer[dr]); if (disks[dr]->queue) blk_cleanup_queue(disks[dr]->queue); put_disk(disks[dr]); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9e40b283a46..00c57c90e2d 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -46,7 +46,7 @@ #define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ - pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3f129b45451..5fd2010f7d2 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -590,7 +590,7 @@ static void frontend_changed(struct xenbus_device *dev, /* * Enforce precondition before potential leak point. - * blkif_disconnect() is idempotent. + * xen_blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); @@ -601,17 +601,17 @@ static void frontend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: + xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; /* fall through if not online */ case XenbusStateUnknown: - /* implies blkif_disconnect() via blkback_remove() */ + /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ device_unregister(&dev->dev); break; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3ef476070ba..9cbac6b445e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -72,9 +72,15 @@ static struct usb_device_id btusb_table[] = { /* Apple MacBookAir3,1, MacBookAir3,2 */ { USB_DEVICE(0x05ac, 0x821b) }, + /* Apple MacBookAir4,1 */ + { USB_DEVICE(0x05ac, 0x821f) }, + /* Apple MacBookPro8,2 */ { USB_DEVICE(0x05ac, 0x821a) }, + /* Apple MacMini5,1 */ + { USB_DEVICE(0x05ac, 0x8281) }, + /* AVM BlueFRITZ! USB v2.0 */ { USB_DEVICE(0x057c, 0x3800) }, diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c index 65d27aff553..04d353f58d7 100644 --- a/drivers/bluetooth/btwilink.c +++ b/drivers/bluetooth/btwilink.c @@ -125,6 +125,13 @@ static long st_receive(void *priv_data, struct sk_buff *skb) /* protocol structure registered with shared transport */ static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = { { + .chnl_id = HCI_EVENT_PKT, /* HCI Events */ + .hdr_len = sizeof(struct hci_event_hdr), + .offset_len_in_hdr = offsetof(struct hci_event_hdr, plen), + .len_size = 1, /* sizeof(plen) in struct hci_event_hdr */ + .reserve = 8, + }, + { .chnl_id = HCI_ACLDATA_PKT, /* ACL */ .hdr_len = sizeof(struct hci_acl_hdr), .offset_len_in_hdr = offsetof(struct hci_acl_hdr, dlen), @@ -138,13 +145,6 @@ static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = { .len_size = 1, /* sizeof(dlen) in struct hci_sco_hdr */ .reserve = 8, }, - { - .chnl_id = HCI_EVENT_PKT, /* HCI Events */ - .hdr_len = sizeof(struct hci_event_hdr), - .offset_len_in_hdr = offsetof(struct hci_event_hdr, plen), - .len_size = 1, /* sizeof(plen) in struct hci_event_hdr */ - .reserve = 8, - }, }; /* Called from HCI core to initialize the device */ @@ -240,7 +240,7 @@ static int ti_st_close(struct hci_dev *hdev) if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags)) return 0; - for (i = 0; i < MAX_BT_CHNL_IDS; i++) { + for (i = MAX_BT_CHNL_IDS-1; i >= 0; i--) { err = st_unregister(&ti_st_proto[i]); if (err) BT_ERR("st_unregister(%d) failed with error %d", diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index f6595aba4f0..fa567f1158c 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -43,6 +43,7 @@ config TCG_NSC config TCG_ATMEL tristate "Atmel TPM Interface" + depends on PPC64 || HAS_IOPORT ---help--- If you have a TPM security chip from Atmel say Yes and it will be accessible from within Linux. To compile this driver diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index caf8012ef47..9ca5c021d0b 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -383,6 +383,9 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, u32 count, ordinal; unsigned long stop; + if (bufsiz > TPM_BUFSIZE) + bufsiz = TPM_BUFSIZE; + count = be32_to_cpu(*((__be32 *) (buf + 2))); ordinal = be32_to_cpu(*((__be32 *) (buf + 6))); if (count == 0) @@ -1102,6 +1105,7 @@ ssize_t tpm_read(struct file *file, char __user *buf, { struct tpm_chip *chip = file->private_data; ssize_t ret_size; + int rc; del_singleshot_timer_sync(&chip->user_read_timer); flush_work_sync(&chip->work); @@ -1112,8 +1116,11 @@ ssize_t tpm_read(struct file *file, char __user *buf, ret_size = size; mutex_lock(&chip->buffer_mutex); - if (copy_to_user(buf, chip->data_buffer, ret_size)) + rc = copy_to_user(buf, chip->data_buffer, ret_size); + memset(chip->data_buffer, 0, ret_size); + if (rc) ret_size = -EFAULT; + mutex_unlock(&chip->buffer_mutex); } diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 82facc9104c..4d2464871ad 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -396,8 +396,6 @@ static void __exit cleanup_nsc(void) if (pdev) { tpm_nsc_remove(&pdev->dev); platform_device_unregister(pdev); - kfree(pdev); - pdev = NULL; } platform_driver_unregister(&nsc_drv); diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 7b0603eb012..cdc02ac8f41 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -261,6 +261,9 @@ static int pcc_get_offset(int cpu) pr = per_cpu(processors, cpu); pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); + if (!pr) + return -ENODEV; + status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); if (ACPI_FAILURE(status)) return -ENODEV; diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 57cd3a406ed..fd7170a9ad2 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -290,6 +290,9 @@ static const struct { {PCI_VENDOR_ID_NEC, PCI_ANY_ID, PCI_ANY_ID, QUIRK_CYCLE_TIMER}, + {PCI_VENDOR_ID_O2, PCI_ANY_ID, PCI_ANY_ID, + QUIRK_NO_MSI}, + {PCI_VENDOR_ID_RICOH, PCI_ANY_ID, PCI_ANY_ID, QUIRK_CYCLE_TIMER}, diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c index 231714def4d..4e24436b0f8 100644 --- a/drivers/gpio/gpio-generic.c +++ b/drivers/gpio/gpio-generic.c @@ -351,7 +351,7 @@ static int bgpio_setup_direction(struct bgpio_chip *bgc, return 0; } -int __devexit bgpio_remove(struct bgpio_chip *bgc) +int bgpio_remove(struct bgpio_chip *bgc) { int err = gpiochip_remove(&bgc->gc); @@ -361,15 +361,10 @@ int __devexit bgpio_remove(struct bgpio_chip *bgc) } EXPORT_SYMBOL_GPL(bgpio_remove); -int __devinit bgpio_init(struct bgpio_chip *bgc, - struct device *dev, - unsigned long sz, - void __iomem *dat, - void __iomem *set, - void __iomem *clr, - void __iomem *dirout, - void __iomem *dirin, - bool big_endian) +int bgpio_init(struct bgpio_chip *bgc, struct device *dev, + unsigned long sz, void __iomem *dat, void __iomem *set, + void __iomem *clr, void __iomem *dirout, void __iomem *dirin, + bool big_endian) { int ret; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 0599854e221..118ec12d2d5 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -34,8 +34,8 @@ struct gpio_bank { u16 irq; u16 virtual_irq_start; int method; -#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS) u32 suspend_wakeup; +#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS) u32 saved_wakeup; #endif u32 non_wakeup_gpios; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index c43b8ff626a..0550dcb8581 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -577,6 +577,7 @@ pca953x_get_alt_pdata(struct i2c_client *client, int *gpio_base, int *invert) void pca953x_get_alt_pdata(struct i2c_client *client, int *gpio_base, int *invert) { + *gpio_base = -1; } #endif diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ce045a8cf82..f07e4252b70 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -67,11 +67,11 @@ module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); MODULE_PARM_DESC(i915_enable_rc6, "Enable power-saving render C-state 6 (default: true)"); -unsigned int i915_enable_fbc __read_mostly = 1; +unsigned int i915_enable_fbc __read_mostly = -1; module_param_named(i915_enable_fbc, i915_enable_fbc, int, 0600); MODULE_PARM_DESC(i915_enable_fbc, "Enable frame buffer compression for power savings " - "(default: false)"); + "(default: -1 (use per-chip default))"); unsigned int i915_lvds_downclock __read_mostly = 0; module_param_named(lvds_downclock, i915_lvds_downclock, int, 0400); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56a8554d903..04411ad2e77 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1799,6 +1799,7 @@ static void intel_update_fbc(struct drm_device *dev) struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; struct drm_i915_gem_object *obj; + int enable_fbc; DRM_DEBUG_KMS("\n"); @@ -1839,8 +1840,15 @@ static void intel_update_fbc(struct drm_device *dev) intel_fb = to_intel_framebuffer(fb); obj = intel_fb->obj; - if (!i915_enable_fbc) { - DRM_DEBUG_KMS("fbc disabled per module param (default off)\n"); + enable_fbc = i915_enable_fbc; + if (enable_fbc < 0) { + DRM_DEBUG_KMS("fbc set to per-chip default\n"); + enable_fbc = 1; + if (INTEL_INFO(dev)->gen <= 5) + enable_fbc = 0; + } + if (!enable_fbc) { + DRM_DEBUG_KMS("fbc disabled per module param\n"); dev_priv->no_fbc_reason = FBC_MODULE_PARAM; goto out_disable; } @@ -4687,13 +4695,13 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, bpc = 6; /* min is 18bpp */ break; case 24: - bpc = min((unsigned int)8, display_bpc); + bpc = 8; break; case 30: - bpc = min((unsigned int)10, display_bpc); + bpc = 10; break; case 48: - bpc = min((unsigned int)12, display_bpc); + bpc = 12; break; default: DRM_DEBUG("unsupported depth, assuming 24 bits\n"); @@ -4701,10 +4709,12 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, break; } + display_bpc = min(display_bpc, bpc); + DRM_DEBUG_DRIVER("setting pipe bpc to %d (max display bpc %d)\n", bpc, display_bpc); - *pipe_bpp = bpc * 3; + *pipe_bpp = display_bpc * 3; return display_bpc != bpc; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0b2ee9d3998..fe1099d8817 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -337,9 +337,6 @@ extern void intel_release_load_detect_pipe(struct intel_encoder *intel_encoder, struct drm_connector *connector, struct intel_load_detect_pipe *old); -extern struct drm_connector* intel_sdvo_find(struct drm_device *dev, int sdvoB); -extern int intel_sdvo_supports_hotplug(struct drm_connector *connector); -extern void intel_sdvo_set_hotplug(struct drm_connector *connector, int enable); extern void intelfb_restore(void); extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, u16 blue, int regno); diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 30fe554d893..6348c499616 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -92,6 +92,11 @@ struct intel_sdvo { */ uint16_t attached_output; + /* + * Hotplug activation bits for this device + */ + uint8_t hotplug_active[2]; + /** * This is used to select the color range of RBG outputs in HDMI mode. * It is only valid when using TMDS encoding and 8 bit per color mode. @@ -1208,74 +1213,20 @@ static bool intel_sdvo_get_capabilities(struct intel_sdvo *intel_sdvo, struct in return true; } -/* No use! */ -#if 0 -struct drm_connector* intel_sdvo_find(struct drm_device *dev, int sdvoB) -{ - struct drm_connector *connector = NULL; - struct intel_sdvo *iout = NULL; - struct intel_sdvo *sdvo; - - /* find the sdvo connector */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - iout = to_intel_sdvo(connector); - - if (iout->type != INTEL_OUTPUT_SDVO) - continue; - - sdvo = iout->dev_priv; - - if (sdvo->sdvo_reg == SDVOB && sdvoB) - return connector; - - if (sdvo->sdvo_reg == SDVOC && !sdvoB) - return connector; - - } - - return NULL; -} - -int intel_sdvo_supports_hotplug(struct drm_connector *connector) +static int intel_sdvo_supports_hotplug(struct intel_sdvo *intel_sdvo) { u8 response[2]; - u8 status; - struct intel_sdvo *intel_sdvo; - DRM_DEBUG_KMS("\n"); - - if (!connector) - return 0; - - intel_sdvo = to_intel_sdvo(connector); return intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_HOT_PLUG_SUPPORT, &response, 2) && response[0]; } -void intel_sdvo_set_hotplug(struct drm_connector *connector, int on) +static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) { - u8 response[2]; - u8 status; - struct intel_sdvo *intel_sdvo = to_intel_sdvo(connector); - - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_GET_ACTIVE_HOT_PLUG, NULL, 0); - intel_sdvo_read_response(intel_sdvo, &response, 2); - - if (on) { - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_GET_HOT_PLUG_SUPPORT, NULL, 0); - status = intel_sdvo_read_response(intel_sdvo, &response, 2); - - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &response, 2); - } else { - response[0] = 0; - response[1] = 0; - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &response, 2); - } + struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base); - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_GET_ACTIVE_HOT_PLUG, NULL, 0); - intel_sdvo_read_response(intel_sdvo, &response, 2); + intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &intel_sdvo->hotplug_active, 2); } -#endif static bool intel_sdvo_multifunc_encoder(struct intel_sdvo *intel_sdvo) @@ -2045,6 +1996,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) { struct drm_encoder *encoder = &intel_sdvo->base.base; struct drm_connector *connector; + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); struct intel_connector *intel_connector; struct intel_sdvo_connector *intel_sdvo_connector; @@ -2062,7 +2014,17 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) intel_connector = &intel_sdvo_connector->base; connector = &intel_connector->base; - connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + if (intel_sdvo_supports_hotplug(intel_sdvo) & (1 << device)) { + connector->polled = DRM_CONNECTOR_POLL_HPD; + intel_sdvo->hotplug_active[0] |= 1 << device; + /* Some SDVO devices have one-shot hotplug interrupts. + * Ensure that they get re-enabled when an interrupt happens. + */ + intel_encoder->hot_plug = intel_sdvo_enable_hotplug; + intel_sdvo_enable_hotplug(intel_encoder); + } + else + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; encoder->encoder_type = DRM_MODE_ENCODER_TMDS; connector->connector_type = DRM_MODE_CONNECTOR_DVID; @@ -2569,6 +2531,14 @@ bool intel_sdvo_init(struct drm_device *dev, int sdvo_reg) if (!intel_sdvo_get_capabilities(intel_sdvo, &intel_sdvo->caps)) goto err; + /* Set up hotplug command - note paranoia about contents of reply. + * We assume that the hardware is in a sane state, and only touch + * the bits we think we understand. + */ + intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ACTIVE_HOT_PLUG, + &intel_sdvo->hotplug_active, 2); + intel_sdvo->hotplug_active[0] &= ~0x3; + if (intel_sdvo_output_setup(intel_sdvo, intel_sdvo->caps.output_flags) != true) { DRM_DEBUG_KMS("SDVO output failed to setup on SDVO%c\n", diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index e88c64417a8..14cc88aaf3a 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -277,7 +277,12 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, case ATOM_ARG_FB: idx = U8(*ptr); (*ptr)++; - val = gctx->scratch[((gctx->fb_base + idx) / 4)]; + if ((gctx->fb_base + (idx * 4)) > gctx->scratch_size_bytes) { + DRM_ERROR("ATOM: fb read beyond scratch region: %d vs. %d\n", + gctx->fb_base + (idx * 4), gctx->scratch_size_bytes); + val = 0; + } else + val = gctx->scratch[(gctx->fb_base / 4) + idx]; if (print) DEBUG("FB[0x%02X]", idx); break; @@ -531,7 +536,11 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, case ATOM_ARG_FB: idx = U8(*ptr); (*ptr)++; - gctx->scratch[((gctx->fb_base + idx) / 4)] = val; + if ((gctx->fb_base + (idx * 4)) > gctx->scratch_size_bytes) { + DRM_ERROR("ATOM: fb write beyond scratch region: %d vs. %d\n", + gctx->fb_base + (idx * 4), gctx->scratch_size_bytes); + } else + gctx->scratch[(gctx->fb_base / 4) + idx] = val; DEBUG("FB[0x%02X]", idx); break; case ATOM_ARG_PLL: @@ -1370,11 +1379,13 @@ int atom_allocate_fb_scratch(struct atom_context *ctx) usage_bytes = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb * 1024; } + ctx->scratch_size_bytes = 0; if (usage_bytes == 0) usage_bytes = 20 * 1024; /* allocate some scratch memory */ ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL); if (!ctx->scratch) return -ENOMEM; + ctx->scratch_size_bytes = usage_bytes; return 0; } diff --git a/drivers/gpu/drm/radeon/atom.h b/drivers/gpu/drm/radeon/atom.h index a589a55b223..93cfe2086ba 100644 --- a/drivers/gpu/drm/radeon/atom.h +++ b/drivers/gpu/drm/radeon/atom.h @@ -137,6 +137,7 @@ struct atom_context { int cs_equal, cs_above; int io_mode; uint32_t *scratch; + int scratch_size_bytes; }; extern int atom_debug; diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index c742944d380..a515b2a09d8 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -466,7 +466,7 @@ static void atombios_crtc_program_ss(struct drm_crtc *crtc, return; } args.v2.ucEnable = enable; - if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK)) + if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE41(rdev)) args.v2.ucEnable = ATOM_DISABLE; } else if (ASIC_IS_DCE3(rdev)) { args.v1.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage); diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 7ad43c6b1db..79e8ebc0530 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -115,6 +115,7 @@ static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector, u8 msg[20]; int msg_bytes = send_bytes + 4; u8 ack; + unsigned retry; if (send_bytes > 16) return -1; @@ -125,20 +126,22 @@ static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector, msg[3] = (msg_bytes << 4) | (send_bytes - 1); memcpy(&msg[4], send, send_bytes); - while (1) { + for (retry = 0; retry < 4; retry++) { ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus, msg, msg_bytes, NULL, 0, delay, &ack); - if (ret < 0) + if (ret == -EBUSY) + continue; + else if (ret < 0) return ret; if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK) - break; + return send_bytes; else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER) udelay(400); else return -EIO; } - return send_bytes; + return -EIO; } static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector, @@ -149,26 +152,31 @@ static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector, int msg_bytes = 4; u8 ack; int ret; + unsigned retry; msg[0] = address; msg[1] = address >> 8; msg[2] = AUX_NATIVE_READ << 4; msg[3] = (msg_bytes << 4) | (recv_bytes - 1); - while (1) { + for (retry = 0; retry < 4; retry++) { ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus, msg, msg_bytes, recv, recv_bytes, delay, &ack); - if (ret == 0) - return -EPROTO; - if (ret < 0) + if (ret == -EBUSY) + continue; + else if (ret < 0) return ret; if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK) return ret; else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER) udelay(400); + else if (ret == 0) + return -EPROTO; else return -EIO; } + + return -EIO; } static void radeon_write_dpcd_reg(struct radeon_connector *radeon_connector, @@ -232,7 +240,9 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, for (retry = 0; retry < 4; retry++) { ret = radeon_process_aux_ch(auxch, msg, msg_bytes, reply, reply_bytes, 0, &ack); - if (ret < 0) { + if (ret == -EBUSY) + continue; + else if (ret < 0) { DRM_DEBUG_KMS("aux_ch failed %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index dc0a5b56c81..c4ffa14fb2f 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1404,7 +1404,8 @@ int evergreen_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); - WREG32(CP_RB_WPTR, 0); + rdev->cp.wptr = 0; + WREG32(CP_RB_WPTR, rdev->cp.wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB_RPTR_ADDR, @@ -1426,7 +1427,6 @@ int evergreen_cp_resume(struct radeon_device *rdev) WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); rdev->cp.rptr = RREG32(CP_RB_RPTR); - rdev->cp.wptr = RREG32(CP_RB_WPTR); evergreen_cp_start(rdev); rdev->cp.ready = true; @@ -1590,48 +1590,6 @@ static u32 evergreen_get_tile_pipe_to_backend_map(struct radeon_device *rdev, return backend_map; } -static void evergreen_program_channel_remap(struct radeon_device *rdev) -{ - u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp; - - tmp = RREG32(MC_SHARED_CHMAP); - switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { - case 0: - case 1: - case 2: - case 3: - default: - /* default mapping */ - mc_shared_chremap = 0x00fac688; - break; - } - - switch (rdev->family) { - case CHIP_HEMLOCK: - case CHIP_CYPRESS: - case CHIP_BARTS: - tcp_chan_steer_lo = 0x54763210; - tcp_chan_steer_hi = 0x0000ba98; - break; - case CHIP_JUNIPER: - case CHIP_REDWOOD: - case CHIP_CEDAR: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_TURKS: - case CHIP_CAICOS: - default: - tcp_chan_steer_lo = 0x76543210; - tcp_chan_steer_hi = 0x0000ba98; - break; - } - - WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo); - WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi); - WREG32(MC_SHARED_CHREMAP, mc_shared_chremap); -} - static void evergreen_gpu_init(struct radeon_device *rdev) { u32 cc_rb_backend_disable = 0; @@ -2078,8 +2036,6 @@ static void evergreen_gpu_init(struct radeon_device *rdev) WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); - evergreen_program_channel_remap(rdev); - num_shader_engines = ((RREG32(GB_ADDR_CONFIG) & NUM_SHADER_ENGINES(3)) >> 12) + 1; grbm_gfx_index = INSTANCE_BROADCAST_WRITES; @@ -3171,21 +3127,23 @@ int evergreen_suspend(struct radeon_device *rdev) } int evergreen_copy_blit(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, struct radeon_fence *fence) + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence *fence) { int r; mutex_lock(&rdev->r600_blit.mutex); rdev->r600_blit.vb_ib = NULL; - r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE); + r = evergreen_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE); if (r) { if (rdev->r600_blit.vb_ib) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); mutex_unlock(&rdev->r600_blit.mutex); return r; } - evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE); + evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE); evergreen_blit_done_copy(rdev, fence); mutex_unlock(&rdev->r600_blit.mutex); return 0; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index cbf57d75d92..8c79ca97753 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -569,36 +569,6 @@ static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev, return backend_map; } -static void cayman_program_channel_remap(struct radeon_device *rdev) -{ - u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp; - - tmp = RREG32(MC_SHARED_CHMAP); - switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { - case 0: - case 1: - case 2: - case 3: - default: - /* default mapping */ - mc_shared_chremap = 0x00fac688; - break; - } - - switch (rdev->family) { - case CHIP_CAYMAN: - default: - //tcp_chan_steer_lo = 0x54763210 - tcp_chan_steer_lo = 0x76543210; - tcp_chan_steer_hi = 0x0000ba98; - break; - } - - WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo); - WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi); - WREG32(MC_SHARED_CHREMAP, mc_shared_chremap); -} - static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev, u32 disable_mask_per_se, u32 max_disable_mask_per_se, @@ -842,8 +812,6 @@ static void cayman_gpu_init(struct radeon_device *rdev) WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); - cayman_program_channel_remap(rdev); - /* primary versions */ WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); @@ -1187,7 +1155,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); - WREG32(CP_RB0_WPTR, 0); + rdev->cp.wptr = 0; + WREG32(CP_RB0_WPTR, rdev->cp.wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); @@ -1207,7 +1176,6 @@ int cayman_cp_resume(struct radeon_device *rdev) WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8); rdev->cp.rptr = RREG32(CP_RB0_RPTR); - rdev->cp.wptr = RREG32(CP_RB0_WPTR); /* ring1 - compute only */ /* Set ring buffer size */ @@ -1220,7 +1188,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA); - WREG32(CP_RB1_WPTR, 0); + rdev->cp1.wptr = 0; + WREG32(CP_RB1_WPTR, rdev->cp1.wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC); @@ -1232,7 +1201,6 @@ int cayman_cp_resume(struct radeon_device *rdev) WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8); rdev->cp1.rptr = RREG32(CP_RB1_RPTR); - rdev->cp1.wptr = RREG32(CP_RB1_WPTR); /* ring2 - compute only */ /* Set ring buffer size */ @@ -1245,7 +1213,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA); - WREG32(CP_RB2_WPTR, 0); + rdev->cp2.wptr = 0; + WREG32(CP_RB2_WPTR, rdev->cp2.wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC); @@ -1257,7 +1226,6 @@ int cayman_cp_resume(struct radeon_device *rdev) WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8); rdev->cp2.rptr = RREG32(CP_RB2_RPTR); - rdev->cp2.wptr = RREG32(CP_RB2_WPTR); /* start the rings */ cayman_cp_start(rdev); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index f2204cb1ccd..7fcdbbbf297 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -721,11 +721,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev, int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence) { uint32_t cur_pages; - uint32_t stride_bytes = PAGE_SIZE; + uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE; uint32_t pitch; uint32_t stride_pixels; unsigned ndw; @@ -737,7 +737,7 @@ int r100_copy_blit(struct radeon_device *rdev, /* radeon pitch is /64 */ pitch = stride_bytes / 64; stride_pixels = stride_bytes / 4; - num_loops = DIV_ROUND_UP(num_pages, 8191); + num_loops = DIV_ROUND_UP(num_gpu_pages, 8191); /* Ask for enough room for blit + flush + fence */ ndw = 64 + (10 * num_loops); @@ -746,12 +746,12 @@ int r100_copy_blit(struct radeon_device *rdev, DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); return -EINVAL; } - while (num_pages > 0) { - cur_pages = num_pages; + while (num_gpu_pages > 0) { + cur_pages = num_gpu_pages; if (cur_pages > 8191) { cur_pages = 8191; } - num_pages -= cur_pages; + num_gpu_pages -= cur_pages; /* pages are in Y direction - height page width in X direction - width */ @@ -773,8 +773,8 @@ int r100_copy_blit(struct radeon_device *rdev, radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); radeon_ring_write(rdev, 0); radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); - radeon_ring_write(rdev, num_pages); - radeon_ring_write(rdev, num_pages); + radeon_ring_write(rdev, num_gpu_pages); + radeon_ring_write(rdev, num_gpu_pages); radeon_ring_write(rdev, cur_pages | (stride_pixels << 16)); } radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); @@ -990,7 +990,8 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) /* Force read & write ptr to 0 */ WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE); WREG32(RADEON_CP_RB_RPTR_WR, 0); - WREG32(RADEON_CP_RB_WPTR, 0); + rdev->cp.wptr = 0; + WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); /* set the wb address whether it's enabled or not */ WREG32(R_00070C_CP_RB_RPTR_ADDR, @@ -1007,9 +1008,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(RADEON_CP_RB_CNTL, tmp); udelay(10); rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); - rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR); - /* protect against crazy HW on resume */ - rdev->cp.wptr &= rdev->cp.ptr_mask; /* Set cp mode to bus mastering & enable cp*/ WREG32(RADEON_CP_CSQ_MODE, REG_SET(RADEON_INDIRECT2_START, indirect2_start) | diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index f2405830041..a1f3ba063c2 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -84,7 +84,7 @@ static int r200_get_vtx_size_0(uint32_t vtx_fmt_0) int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence) { uint32_t size; @@ -93,7 +93,7 @@ int r200_copy_dma(struct radeon_device *rdev, int r = 0; /* radeon pitch is /64 */ - size = num_pages << PAGE_SHIFT; + size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT; num_loops = DIV_ROUND_UP(size, 0x1FFFFF); r = radeon_ring_lock(rdev, num_loops * 4 + 64); if (r) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index aa5571b73aa..720dd99163f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2209,7 +2209,8 @@ int r600_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); - WREG32(CP_RB_WPTR, 0); + rdev->cp.wptr = 0; + WREG32(CP_RB_WPTR, rdev->cp.wptr); /* set the wb address whether it's enabled or not */ WREG32(CP_RB_RPTR_ADDR, @@ -2231,7 +2232,6 @@ int r600_cp_resume(struct radeon_device *rdev) WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); rdev->cp.rptr = RREG32(CP_RB_RPTR); - rdev->cp.wptr = RREG32(CP_RB_WPTR); r600_cp_start(rdev); rdev->cp.ready = true; @@ -2353,21 +2353,23 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } int r600_copy_blit(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, struct radeon_fence *fence) + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence *fence) { int r; mutex_lock(&rdev->r600_blit.mutex); rdev->r600_blit.vb_ib = NULL; - r = r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE); + r = r600_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE); if (r) { if (rdev->r600_blit.vb_ib) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); mutex_unlock(&rdev->r600_blit.mutex); return r; } - r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE); + r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE); r600_blit_done_copy(rdev, fence); mutex_unlock(&rdev->r600_blit.mutex); return 0; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 32807baf55e..c1e056b35b2 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -322,6 +322,7 @@ union radeon_gart_table { #define RADEON_GPU_PAGE_SIZE 4096 #define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1) +#define RADEON_GPU_PAGE_SHIFT 12 struct radeon_gart { dma_addr_t table_addr; @@ -914,17 +915,17 @@ struct radeon_asic { int (*copy_blit)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence); int (*copy_dma)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence); int (*copy)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence); uint32_t (*get_engine_clock)(struct radeon_device *rdev); void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock); diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 3d7a0d7c6a9..3dedaa07aac 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -75,7 +75,7 @@ uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg); int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence); int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, @@ -143,7 +143,7 @@ extern void r100_post_page_flip(struct radeon_device *rdev, int crtc); extern int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, + unsigned num_gpu_pages, struct radeon_fence *fence); void r200_set_safe_registers(struct radeon_device *rdev); @@ -311,7 +311,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, struct radeon_fence *fence); + unsigned num_gpu_pages, struct radeon_fence *fence); void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -403,7 +403,7 @@ void evergreen_bandwidth_update(struct radeon_device *rdev); void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_pages, struct radeon_fence *fence); + unsigned num_gpu_pages, struct radeon_fence *fence); void evergreen_hpd_init(struct radeon_device *rdev); void evergreen_hpd_fini(struct radeon_device *rdev); bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index c4b8741dbf5..449c3d8c683 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -68,11 +68,11 @@ void radeon_connector_hotplug(struct drm_connector *connector) if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { int saved_dpms = connector->dpms; - if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && - radeon_dp_needs_link_train(radeon_connector)) - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - else + /* Only turn off the display it it's physically disconnected */ + if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + else if (radeon_dp_needs_link_train(radeon_connector)) + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); connector->dpms = saved_dpms; } } @@ -1303,23 +1303,14 @@ radeon_dp_detect(struct drm_connector *connector, bool force) /* get the DPCD from the bridge */ radeon_dp_getdpcd(radeon_connector); - if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) - ret = connector_status_connected; - else { - /* need to setup ddc on the bridge */ - if (encoder) - radeon_atom_ext_encoder_setup_ddc(encoder); + if (encoder) { + /* setup ddc on the bridge */ + radeon_atom_ext_encoder_setup_ddc(encoder); if (radeon_ddc_probe(radeon_connector, - radeon_connector->requires_extended_probe)) + radeon_connector->requires_extended_probe)) /* try DDC */ ret = connector_status_connected; - } - - if ((ret == connector_status_disconnected) && - radeon_connector->dac_load_detect) { - struct drm_encoder *encoder = radeon_best_single_encoder(connector); - struct drm_encoder_helper_funcs *encoder_funcs; - if (encoder) { - encoder_funcs = encoder->helper_private; + else if (radeon_connector->dac_load_detect) { /* try load detection */ + struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; ret = encoder_funcs->detect(encoder, connector); } } diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 3189a7efb2e..fde25c0d65a 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -208,23 +208,25 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, int xorigin = 0, yorigin = 0; int w = radeon_crtc->cursor_width; - if (x < 0) - xorigin = -x + 1; - if (y < 0) - yorigin = -y + 1; - if (xorigin >= CURSOR_WIDTH) - xorigin = CURSOR_WIDTH - 1; - if (yorigin >= CURSOR_HEIGHT) - yorigin = CURSOR_HEIGHT - 1; - if (ASIC_IS_AVIVO(rdev)) { - int i = 0; - struct drm_crtc *crtc_p; - /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); + } + DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); + + if (x < 0) { + xorigin = min(-x, CURSOR_WIDTH - 1); + x = 0; + } + if (y < 0) { + yorigin = min(-y, CURSOR_HEIGHT - 1); + y = 0; + } + + if (ASIC_IS_AVIVO(rdev)) { + int i = 0; + struct drm_crtc *crtc_p; /* avivo cursor image can't end on 128 pixel boundary or * go past the end of the frame if both crtcs are enabled @@ -253,16 +255,12 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, radeon_lock_cursor(crtc, true); if (ASIC_IS_DCE4(rdev)) { - WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, - ((xorigin ? 0 : x) << 16) | - (yorigin ? 0 : y)); + WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y); WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); } else if (ASIC_IS_AVIVO(rdev)) { - WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, - ((xorigin ? 0 : x) << 16) | - (yorigin ? 0 : y)); + WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y); WREG32(AVIVO_D1CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); @@ -276,8 +274,8 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, | yorigin)); WREG32(RADEON_CUR_HORZ_VERT_POSN + radeon_crtc->crtc_offset, (RADEON_CUR_LOCK - | ((xorigin ? 0 : x) << 16) - | (yorigin ? 0 : y))); + | (x << 16) + | y)); /* offset is from DISP(2)_BASE_ADDRESS */ WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + (yorigin * 256))); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 6cc17fb96a5..6adb3e58aff 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -473,8 +473,8 @@ pflip_cleanup: spin_lock_irqsave(&dev->event_lock, flags); radeon_crtc->unpin_work = NULL; unlock_free: - drm_gem_object_unreference_unlocked(old_radeon_fb->obj); spin_unlock_irqrestore(&dev->event_lock, flags); + drm_gem_object_unreference_unlocked(old_radeon_fb->obj); radeon_fence_unref(&work->fence); kfree(work); diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 319d85d7e75..eb3f6dc6df8 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -1507,7 +1507,14 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) switch (mode) { case DRM_MODE_DPMS_ON: args.ucAction = ATOM_ENABLE; - atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + /* workaround for DVOOutputControl on some RS690 systems */ + if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DDI) { + u32 reg = RREG32(RADEON_BIOS_3_SCRATCH); + WREG32(RADEON_BIOS_3_SCRATCH, reg & ~ATOM_S3_DFP2I_ACTIVE); + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + WREG32(RADEON_BIOS_3_SCRATCH, reg); + } else + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { args.ucAction = ATOM_LCD_BLON; atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); @@ -1631,7 +1638,17 @@ atombios_set_encoder_crtc_source(struct drm_encoder *encoder) break; case 2: args.v2.ucCRTC = radeon_crtc->crtc_id; - args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder); + if (radeon_encoder_is_dp_bridge(encoder)) { + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + + if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) + args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS; + else if (connector->connector_type == DRM_MODE_CONNECTOR_VGA) + args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT; + else + args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder); + } else + args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder); switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: @@ -1748,9 +1765,17 @@ static int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder) /* DCE4/5 */ if (ASIC_IS_DCE4(rdev)) { dig = radeon_encoder->enc_priv; - if (ASIC_IS_DCE41(rdev)) - return radeon_crtc->crtc_id; - else { + if (ASIC_IS_DCE41(rdev)) { + /* ontario follows DCE4 */ + if (rdev->family == CHIP_PALM) { + if (dig->linkb) + return 1; + else + return 0; + } else + /* llano follows DCE3.2 */ + return radeon_crtc->crtc_id; + } else { switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: if (dig->linkb) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 9b86fb0e412..0b5468bfaf5 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -277,7 +277,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, DRM_ERROR("Trying to move memory with CP turned off.\n"); return -EINVAL; } - r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages, fence); + + BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); + + r = radeon_copy(rdev, old_start, new_start, + new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ + fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 4720d000d44..b13c2eedc32 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -536,55 +536,6 @@ static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev, return backend_map; } -static void rv770_program_channel_remap(struct radeon_device *rdev) -{ - u32 tcp_chan_steer, mc_shared_chremap, tmp; - bool force_no_swizzle; - - switch (rdev->family) { - case CHIP_RV770: - case CHIP_RV730: - force_no_swizzle = false; - break; - case CHIP_RV710: - case CHIP_RV740: - default: - force_no_swizzle = true; - break; - } - - tmp = RREG32(MC_SHARED_CHMAP); - switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { - case 0: - case 1: - default: - /* default mapping */ - mc_shared_chremap = 0x00fac688; - break; - case 2: - case 3: - if (force_no_swizzle) - mc_shared_chremap = 0x00fac688; - else - mc_shared_chremap = 0x00bbc298; - break; - } - - if (rdev->family == CHIP_RV740) - tcp_chan_steer = 0x00ef2a60; - else - tcp_chan_steer = 0x00fac688; - - /* RV770 CE has special chremap setup */ - if (rdev->pdev->device == 0x944e) { - tcp_chan_steer = 0x00b08b08; - mc_shared_chremap = 0x00b08b08; - } - - WREG32(TCP_CHAN_STEER, tcp_chan_steer); - WREG32(MC_SHARED_CHREMAP, mc_shared_chremap); -} - static void rv770_gpu_init(struct radeon_device *rdev) { int i, j, num_qd_pipes; @@ -785,8 +736,6 @@ static void rv770_gpu_init(struct radeon_device *rdev) WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); - rv770_program_channel_remap(rdev); - WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a4d38d85909..ef06194c5aa 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -394,7 +394,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) { if (bo->ttm == NULL) { - ret = ttm_bo_add_ttm(bo, false); + bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED); + ret = ttm_bo_add_ttm(bo, zero); if (ret) goto out_err; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ae3c6f5dd2b..082fcaea583 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -321,7 +321,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type]; struct ttm_tt *ttm = bo->ttm; struct ttm_mem_reg *old_mem = &bo->mem; - struct ttm_mem_reg old_copy; + struct ttm_mem_reg old_copy = *old_mem; void *old_iomap; void *new_iomap; int ret; diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c index a597039d075..72ca689b647 100644 --- a/drivers/hid/hid-wacom.c +++ b/drivers/hid/hid-wacom.c @@ -373,6 +373,8 @@ static int wacom_probe(struct hid_device *hdev, hidinput = list_entry(hdev->inputs.next, struct hid_input, list); input = hidinput->input; + __set_bit(INPUT_PROP_POINTER, input->propbit); + /* Basics */ input->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_REL); diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 59d83e83da7..93238378664 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -36,17 +36,25 @@ #include <linux/cpu.h> #include <linux/pci.h> #include <linux/smp.h> +#include <linux/moduleparam.h> #include <asm/msr.h> #include <asm/processor.h> #define DRVNAME "coretemp" +/* + * force_tjmax only matters when TjMax can't be read from the CPU itself. + * When set, it replaces the driver's suboptimal heuristic. + */ +static int force_tjmax; +module_param_named(tjmax, force_tjmax, int, 0444); +MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); + #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ #define NUM_REAL_CORES 16 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 17 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ -#define MAX_THRESH_ATTRS 3 /* Maximum no of Threshold attrs */ -#define TOTAL_ATTRS (MAX_CORE_ATTRS + MAX_THRESH_ATTRS) +#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO) #ifdef CONFIG_SMP @@ -69,8 +77,6 @@ * This value is passed as "id" field to rdmsr/wrmsr functions. * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS, * from where the temperature values should be read. - * @intrpt_reg: One of IA32_THERM_INTERRUPT or IA32_PACKAGE_THERM_INTERRUPT, - * from where the thresholds are read. * @attr_size: Total number of pre-core attrs displayed in the sysfs. * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data. * Otherwise, temp_data holds coretemp data. @@ -79,13 +85,11 @@ struct temp_data { int temp; int ttarget; - int tmin; int tjmax; unsigned long last_updated; unsigned int cpu; u32 cpu_core_id; u32 status_reg; - u32 intrpt_reg; int attr_size; bool is_pkg_data; bool valid; @@ -143,19 +147,6 @@ static ssize_t show_crit_alarm(struct device *dev, return sprintf(buf, "%d\n", (eax >> 5) & 1); } -static ssize_t show_max_alarm(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - u32 eax, edx; - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct platform_data *pdata = dev_get_drvdata(dev); - struct temp_data *tdata = pdata->core_data[attr->index]; - - rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx); - - return sprintf(buf, "%d\n", !!(eax & THERM_STATUS_THRESHOLD1)); -} - static ssize_t show_tjmax(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -174,83 +165,6 @@ static ssize_t show_ttarget(struct device *dev, return sprintf(buf, "%d\n", pdata->core_data[attr->index]->ttarget); } -static ssize_t store_ttarget(struct device *dev, - struct device_attribute *devattr, - const char *buf, size_t count) -{ - struct platform_data *pdata = dev_get_drvdata(dev); - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct temp_data *tdata = pdata->core_data[attr->index]; - u32 eax, edx; - unsigned long val; - int diff; - - if (strict_strtoul(buf, 10, &val)) - return -EINVAL; - - /* - * THERM_MASK_THRESHOLD1 is 7 bits wide. Values are entered in terms - * of milli degree celsius. Hence don't accept val > (127 * 1000) - */ - if (val > tdata->tjmax || val > 127000) - return -EINVAL; - - diff = (tdata->tjmax - val) / 1000; - - mutex_lock(&tdata->update_lock); - rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx); - eax = (eax & ~THERM_MASK_THRESHOLD1) | - (diff << THERM_SHIFT_THRESHOLD1); - wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx); - tdata->ttarget = val; - mutex_unlock(&tdata->update_lock); - - return count; -} - -static ssize_t show_tmin(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct platform_data *pdata = dev_get_drvdata(dev); - - return sprintf(buf, "%d\n", pdata->core_data[attr->index]->tmin); -} - -static ssize_t store_tmin(struct device *dev, - struct device_attribute *devattr, - const char *buf, size_t count) -{ - struct platform_data *pdata = dev_get_drvdata(dev); - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct temp_data *tdata = pdata->core_data[attr->index]; - u32 eax, edx; - unsigned long val; - int diff; - - if (strict_strtoul(buf, 10, &val)) - return -EINVAL; - - /* - * THERM_MASK_THRESHOLD0 is 7 bits wide. Values are entered in terms - * of milli degree celsius. Hence don't accept val > (127 * 1000) - */ - if (val > tdata->tjmax || val > 127000) - return -EINVAL; - - diff = (tdata->tjmax - val) / 1000; - - mutex_lock(&tdata->update_lock); - rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx); - eax = (eax & ~THERM_MASK_THRESHOLD0) | - (diff << THERM_SHIFT_THRESHOLD0); - wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx); - tdata->tmin = val; - mutex_unlock(&tdata->update_lock); - - return count; -} - static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -374,7 +288,6 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) { - /* The 100C is default for both mobile and non mobile CPUs */ int err; u32 eax, edx; u32 val; @@ -385,7 +298,8 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ err = rdmsr_safe_on_cpu(id, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); if (err) { - dev_warn(dev, "Unable to read TjMax from CPU.\n"); + if (c->x86_model > 0xe && c->x86_model != 0x1c) + dev_warn(dev, "Unable to read TjMax from CPU %u\n", id); } else { val = (eax >> 16) & 0xff; /* @@ -393,11 +307,17 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) * will be used */ if (val) { - dev_info(dev, "TjMax is %d C.\n", val); + dev_dbg(dev, "TjMax is %d degrees C\n", val); return val * 1000; } } + if (force_tjmax) { + dev_notice(dev, "TjMax forced to %d degrees C by user\n", + force_tjmax); + return force_tjmax * 1000; + } + /* * An assumption is made for early CPUs and unreadable MSR. * NOTE: the calculated value may not be correct. @@ -414,21 +334,6 @@ static void __devinit get_ucode_rev_on_cpu(void *edx) rdmsr(MSR_IA32_UCODE_REV, eax, *(u32 *)edx); } -static int get_pkg_tjmax(unsigned int cpu, struct device *dev) -{ - int err; - u32 eax, edx, val; - - err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); - if (!err) { - val = (eax >> 16) & 0xff; - if (val) - return val * 1000; - } - dev_warn(dev, "Unable to read Pkg-TjMax from CPU:%u\n", cpu); - return 100000; /* Default TjMax: 100 degree celsius */ -} - static int create_name_attr(struct platform_data *pdata, struct device *dev) { sysfs_attr_init(&pdata->name_attr.attr); @@ -442,19 +347,14 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, int attr_no) { int err, i; - static ssize_t (*rd_ptr[TOTAL_ATTRS]) (struct device *dev, + static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, struct device_attribute *devattr, char *buf) = { show_label, show_crit_alarm, show_temp, show_tjmax, - show_max_alarm, show_ttarget, show_tmin }; - static ssize_t (*rw_ptr[TOTAL_ATTRS]) (struct device *dev, - struct device_attribute *devattr, const char *buf, - size_t count) = { NULL, NULL, NULL, NULL, NULL, - store_ttarget, store_tmin }; - static const char *names[TOTAL_ATTRS] = { + show_ttarget }; + static const char *const names[TOTAL_ATTRS] = { "temp%d_label", "temp%d_crit_alarm", "temp%d_input", "temp%d_crit", - "temp%d_max_alarm", "temp%d_max", - "temp%d_max_hyst" }; + "temp%d_max" }; for (i = 0; i < tdata->attr_size; i++) { snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, names[i], @@ -462,10 +362,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; tdata->sd_attrs[i].dev_attr.attr.mode = S_IRUGO; - if (rw_ptr[i]) { - tdata->sd_attrs[i].dev_attr.attr.mode |= S_IWUSR; - tdata->sd_attrs[i].dev_attr.store = rw_ptr[i]; - } tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; tdata->sd_attrs[i].index = attr_no; err = device_create_file(dev, &tdata->sd_attrs[i].dev_attr); @@ -481,9 +377,9 @@ exit_free: } -static int __devinit chk_ucode_version(struct platform_device *pdev) +static int __cpuinit chk_ucode_version(unsigned int cpu) { - struct cpuinfo_x86 *c = &cpu_data(pdev->id); + struct cpuinfo_x86 *c = &cpu_data(cpu); int err; u32 edx; @@ -494,17 +390,15 @@ static int __devinit chk_ucode_version(struct platform_device *pdev) */ if (c->x86_model == 0xe && c->x86_mask < 0xc) { /* check for microcode update */ - err = smp_call_function_single(pdev->id, get_ucode_rev_on_cpu, + err = smp_call_function_single(cpu, get_ucode_rev_on_cpu, &edx, 1); if (err) { - dev_err(&pdev->dev, - "Cannot determine microcode revision of " - "CPU#%u (%d)!\n", pdev->id, err); + pr_err("Cannot determine microcode revision of " + "CPU#%u (%d)!\n", cpu, err); return -ENODEV; } else if (edx < 0x39) { - dev_err(&pdev->dev, - "Errata AE18 not fixed, update BIOS or " - "microcode of the CPU!\n"); + pr_err("Errata AE18 not fixed, update BIOS or " + "microcode of the CPU!\n"); return -ENODEV; } } @@ -538,8 +432,6 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag) tdata->status_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS; - tdata->intrpt_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_INTERRUPT : - MSR_IA32_THERM_INTERRUPT; tdata->is_pkg_data = pkg_flag; tdata->cpu = cpu; tdata->cpu_core_id = TO_CORE_ID(cpu); @@ -548,11 +440,11 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag) return tdata; } -static int create_core_data(struct platform_data *pdata, - struct platform_device *pdev, +static int create_core_data(struct platform_device *pdev, unsigned int cpu, int pkg_flag) { struct temp_data *tdata; + struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; int err, attr_no; @@ -588,20 +480,21 @@ static int create_core_data(struct platform_data *pdata, goto exit_free; /* We can access status register. Get Critical Temperature */ - if (pkg_flag) - tdata->tjmax = get_pkg_tjmax(pdev->id, &pdev->dev); - else - tdata->tjmax = get_tjmax(c, cpu, &pdev->dev); + tdata->tjmax = get_tjmax(c, cpu, &pdev->dev); /* - * Test if we can access the intrpt register. If so, increase the - * 'size' enough to have ttarget/tmin/max_alarm interfaces. - * Initialize ttarget with bits 16:22 of MSR_IA32_THERM_INTERRUPT + * Read the still undocumented bits 8:15 of IA32_TEMPERATURE_TARGET. + * The target temperature is available on older CPUs but not in this + * register. Atoms don't have the register at all. */ - err = rdmsr_safe_on_cpu(cpu, tdata->intrpt_reg, &eax, &edx); - if (!err) { - tdata->attr_size += MAX_THRESH_ATTRS; - tdata->ttarget = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000; + if (c->x86_model > 0xe && c->x86_model != 0x1c) { + err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, + &eax, &edx); + if (!err) { + tdata->ttarget + = tdata->tjmax - ((eax >> 8) & 0xff) * 1000; + tdata->attr_size++; + } } pdata->core_data[attr_no] = tdata; @@ -613,22 +506,20 @@ static int create_core_data(struct platform_data *pdata, return 0; exit_free: + pdata->core_data[attr_no] = NULL; kfree(tdata); return err; } static void coretemp_add_core(unsigned int cpu, int pkg_flag) { - struct platform_data *pdata; struct platform_device *pdev = coretemp_get_pdev(cpu); int err; if (!pdev) return; - pdata = platform_get_drvdata(pdev); - - err = create_core_data(pdata, pdev, cpu, pkg_flag); + err = create_core_data(pdev, cpu, pkg_flag); if (err) dev_err(&pdev->dev, "Adding Core %u failed\n", cpu); } @@ -652,11 +543,6 @@ static int __devinit coretemp_probe(struct platform_device *pdev) struct platform_data *pdata; int err; - /* Check the microcode version of the CPU */ - err = chk_ucode_version(pdev); - if (err) - return err; - /* Initialize the per-package data structures */ pdata = kzalloc(sizeof(struct platform_data), GFP_KERNEL); if (!pdata) @@ -666,7 +552,7 @@ static int __devinit coretemp_probe(struct platform_device *pdev) if (err) goto exit_free; - pdata->phys_proc_id = TO_PHYS_ID(pdev->id); + pdata->phys_proc_id = pdev->id; platform_set_drvdata(pdev, pdata); pdata->hwmon_dev = hwmon_device_register(&pdev->dev); @@ -718,7 +604,7 @@ static int __cpuinit coretemp_device_add(unsigned int cpu) mutex_lock(&pdev_list_mutex); - pdev = platform_device_alloc(DRVNAME, cpu); + pdev = platform_device_alloc(DRVNAME, TO_PHYS_ID(cpu)); if (!pdev) { err = -ENOMEM; pr_err("Device allocation failed\n"); @@ -738,7 +624,7 @@ static int __cpuinit coretemp_device_add(unsigned int cpu) } pdev_entry->pdev = pdev; - pdev_entry->phys_proc_id = TO_PHYS_ID(cpu); + pdev_entry->phys_proc_id = pdev->id; list_add_tail(&pdev_entry->list, &pdev_list); mutex_unlock(&pdev_list_mutex); @@ -799,6 +685,10 @@ static void __cpuinit get_core_online(unsigned int cpu) return; if (!pdev) { + /* Check the microcode version of the CPU */ + if (chk_ucode_version(cpu)) + return; + /* * Alright, we have DTS support. * We are bringing the _first_ core in this pkg diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c index 257957c69d9..4f7c3fc40a8 100644 --- a/drivers/hwmon/ds620.c +++ b/drivers/hwmon/ds620.c @@ -72,7 +72,7 @@ struct ds620_data { char valid; /* !=0 if following fields are valid */ unsigned long last_updated; /* In jiffies */ - u16 temp[3]; /* Register values, word */ + s16 temp[3]; /* Register values, word */ }; /* diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index a561c3a0e91..397fc59b568 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -978,6 +978,8 @@ static void pmbus_find_max_attr(struct i2c_client *client, struct pmbus_limit_attr { u16 reg; /* Limit register */ bool update; /* True if register needs updates */ + bool low; /* True if low limit; for limits with compare + functions only */ const char *attr; /* Attribute name */ const char *alarm; /* Alarm attribute name */ u32 sbit; /* Alarm attribute status bit */ @@ -1029,7 +1031,8 @@ static bool pmbus_add_limit_attrs(struct i2c_client *client, if (attr->compare) { pmbus_add_boolean_cmp(data, name, l->alarm, index, - cbase, cindex, + l->low ? cindex : cbase, + l->low ? cbase : cindex, attr->sbase + page, l->sbit); } else { pmbus_add_boolean_reg(data, name, @@ -1366,11 +1369,13 @@ static const struct pmbus_sensor_attr power_attributes[] = { static const struct pmbus_limit_attr temp_limit_attrs[] = { { .reg = PMBUS_UT_WARN_LIMIT, + .low = true, .attr = "min", .alarm = "min_alarm", .sbit = PB_TEMP_UT_WARNING, }, { .reg = PMBUS_UT_FAULT_LIMIT, + .low = true, .attr = "lcrit", .alarm = "lcrit_alarm", .sbit = PB_TEMP_UT_FAULT, @@ -1399,11 +1404,13 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = { static const struct pmbus_limit_attr temp_limit_attrs23[] = { { .reg = PMBUS_UT_WARN_LIMIT, + .low = true, .attr = "min", .alarm = "min_alarm", .sbit = PB_TEMP_UT_WARNING, }, { .reg = PMBUS_UT_FAULT_LIMIT, + .low = true, .attr = "lcrit", .alarm = "lcrit_alarm", .sbit = PB_TEMP_UT_FAULT, diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index f2b377c56a3..36d7f270b14 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -390,7 +390,7 @@ temp_from_reg(u16 reg, s16 regval) { if (is_word_sized(reg)) return LM75_TEMP_FROM_REG(regval); - return regval * 1000; + return ((s8)regval) * 1000; } static inline u16 @@ -398,7 +398,8 @@ temp_to_reg(u16 reg, long temp) { if (is_word_sized(reg)) return LM75_TEMP_TO_REG(temp); - return DIV_ROUND_CLOSEST(SENSORS_LIMIT(temp, -127000, 128000), 1000); + return (s8)DIV_ROUND_CLOSEST(SENSORS_LIMIT(temp, -127000, 128000), + 1000); } /* Some of analog inputs have internal scaling (2x), 8mV is ADC LSB */ @@ -1715,7 +1716,8 @@ static void w83627ehf_device_remove_files(struct device *dev) } /* Get the monitoring functions started */ -static inline void __devinit w83627ehf_init_device(struct w83627ehf_data *data) +static inline void __devinit w83627ehf_init_device(struct w83627ehf_data *data, + enum kinds kind) { int i; u8 tmp, diode; @@ -1746,10 +1748,16 @@ static inline void __devinit w83627ehf_init_device(struct w83627ehf_data *data) w83627ehf_write_value(data, W83627EHF_REG_VBAT, tmp | 0x01); /* Get thermal sensor types */ - diode = w83627ehf_read_value(data, W83627EHF_REG_DIODE); + switch (kind) { + case w83627ehf: + diode = w83627ehf_read_value(data, W83627EHF_REG_DIODE); + break; + default: + diode = 0x70; + } for (i = 0; i < 3; i++) { if ((tmp & (0x02 << i))) - data->temp_type[i] = (diode & (0x10 << i)) ? 1 : 2; + data->temp_type[i] = (diode & (0x10 << i)) ? 1 : 3; else data->temp_type[i] = 4; /* thermistor */ } @@ -2016,7 +2024,7 @@ static int __devinit w83627ehf_probe(struct platform_device *pdev) } /* Initialize the chip */ - w83627ehf_init_device(data); + w83627ehf_init_device(data, sio_data->kind); data->vrm = vid_which_vrm(); superio_enter(sio_data->sioreg); diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c index 17cf1ab9552..8c2844e5691 100644 --- a/drivers/hwmon/w83791d.c +++ b/drivers/hwmon/w83791d.c @@ -329,8 +329,8 @@ static int w83791d_detect(struct i2c_client *client, struct i2c_board_info *info); static int w83791d_remove(struct i2c_client *client); -static int w83791d_read(struct i2c_client *client, u8 register); -static int w83791d_write(struct i2c_client *client, u8 register, u8 value); +static int w83791d_read(struct i2c_client *client, u8 reg); +static int w83791d_write(struct i2c_client *client, u8 reg, u8 value); static struct w83791d_data *w83791d_update_device(struct device *dev); #ifdef DEBUG diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 9827c5e686c..811dbbd9306 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -327,7 +327,7 @@ config BLK_DEV_OPTI621 select BLK_DEV_IDEPCI help This is a driver for the OPTi 82C621 EIDE controller. - Please read the comments at the top of <file:drivers/ide/pci/opti621.c>. + Please read the comments at the top of <file:drivers/ide/opti621.c>. config BLK_DEV_RZ1000 tristate "RZ1000 chipset bugfix/support" @@ -365,7 +365,7 @@ config BLK_DEV_ALI15X3 normal dual channel support. Please read the comments at the top of - <file:drivers/ide/pci/alim15x3.c>. + <file:drivers/ide/alim15x3.c>. If unsure, say N. @@ -528,7 +528,7 @@ config BLK_DEV_NS87415 This driver adds detection and support for the NS87415 chip (used mainly on SPARC64 and PA-RISC machines). - Please read the comments at the top of <file:drivers/ide/pci/ns87415.c>. + Please read the comments at the top of <file:drivers/ide/ns87415.c>. config BLK_DEV_PDC202XX_OLD tristate "PROMISE PDC202{46|62|65|67} support" @@ -547,7 +547,7 @@ config BLK_DEV_PDC202XX_OLD for more than one card. Please read the comments at the top of - <file:drivers/ide/pci/pdc202xx_old.c>. + <file:drivers/ide/pdc202xx_old.c>. If unsure, say N. @@ -593,7 +593,7 @@ config BLK_DEV_SIS5513 ATA100: SiS635, SiS645, SiS650, SiS730, SiS735, SiS740, SiS745, SiS750 - Please read the comments at the top of <file:drivers/ide/pci/sis5513.c>. + Please read the comments at the top of <file:drivers/ide/sis5513.c>. config BLK_DEV_SL82C105 tristate "Winbond SL82c105 support" @@ -616,7 +616,7 @@ config BLK_DEV_SLC90E66 look-a-like to the PIIX4 it should be a nice addition. Please read the comments at the top of - <file:drivers/ide/pci/slc90e66.c>. + <file:drivers/ide/slc90e66.c>. config BLK_DEV_TRM290 tristate "Tekram TRM290 chipset support" @@ -625,7 +625,7 @@ config BLK_DEV_TRM290 This driver adds support for bus master DMA transfers using the Tekram TRM290 PCI IDE chip. Volunteers are needed for further tweaking and development. - Please read the comments at the top of <file:drivers/ide/pci/trm290.c>. + Please read the comments at the top of <file:drivers/ide/trm290.c>. config BLK_DEV_VIA82CXXX tristate "VIA82CXXX chipset support" @@ -836,7 +836,7 @@ config BLK_DEV_ALI14XX of the ALI M1439/1443/1445/1487/1489 chipsets, and permits faster I/O speeds to be set as well. See the files <file:Documentation/ide/ide.txt> and - <file:drivers/ide/legacy/ali14xx.c> for more info. + <file:drivers/ide/ali14xx.c> for more info. config BLK_DEV_DTC2278 tristate "DTC-2278 support" @@ -847,7 +847,7 @@ config BLK_DEV_DTC2278 boot parameter. It enables support for the secondary IDE interface of the DTC-2278 card, and permits faster I/O speeds to be set as well. See the <file:Documentation/ide/ide.txt> and - <file:drivers/ide/legacy/dtc2278.c> files for more info. + <file:drivers/ide/dtc2278.c> files for more info. config BLK_DEV_HT6560B tristate "Holtek HT6560B support" @@ -858,7 +858,7 @@ config BLK_DEV_HT6560B boot parameter. It enables support for the secondary IDE interface of the Holtek card, and permits faster I/O speeds to be set as well. See the <file:Documentation/ide/ide.txt> and - <file:drivers/ide/legacy/ht6560b.c> files for more info. + <file:drivers/ide/ht6560b.c> files for more info. config BLK_DEV_QD65XX tristate "QDI QD65xx support" @@ -867,7 +867,7 @@ config BLK_DEV_QD65XX help This driver is enabled at runtime using the "qd65xx.probe" kernel boot parameter. It permits faster I/O speeds to be set. See the - <file:Documentation/ide/ide.txt> and <file:drivers/ide/legacy/qd65xx.c> + <file:Documentation/ide/ide.txt> and <file:drivers/ide/qd65xx.c> for more info. config BLK_DEV_UMC8672 @@ -879,7 +879,7 @@ config BLK_DEV_UMC8672 boot parameter. It enables support for the secondary IDE interface of the UMC-8672, and permits faster I/O speeds to be set as well. See the files <file:Documentation/ide/ide.txt> and - <file:drivers/ide/legacy/umc8672.c> for more info. + <file:drivers/ide/umc8672.c> for more info. endif diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 274798068a5..16f69be820c 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -435,7 +435,12 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq) if (!(rq->cmd_flags & REQ_FLUSH)) return BLKPREP_OK; - cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); + if (rq->special) { + cmd = rq->special; + memset(cmd, 0, sizeof(*cmd)); + } else { + cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); + } /* FIXME: map struct ide_taskfile on rq->cmd[] */ BUG_ON(cmd == NULL); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 17bf9d95463..6cd642aaa4d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -287,7 +287,7 @@ void __free_ep(struct kref *kref) if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); } kfree(ep); } @@ -1178,7 +1178,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); put_ep(&ep->com); return CPL_RET_BUF_DONE; } @@ -1377,7 +1377,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) if (!child_ep) { printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", __func__); - l2t_release(L2DATA(tdev), l2t); + l2t_release(tdev, l2t); dst_release(dst); goto reject; } @@ -1956,7 +1956,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!err) goto out; - l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t); + l2t_release(h->rdev.t3cdev_p, ep->l2t); fail4: dst_release(ep->dst); fail3: @@ -2127,7 +2127,7 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, l2t); dst_hold(new); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); ep->l2t = l2t; dst_release(old); ep->dst = new; diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index 7b404e5443e..e34eeb8ae37 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -668,4 +668,3 @@ module_exit(adp5588_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); MODULE_DESCRIPTION("ADP5588/87 Keypad driver"); -MODULE_ALIAS("platform:adp5588-keys"); diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c index b09c7d12721..ab860511f01 100644 --- a/drivers/input/misc/cm109.c +++ b/drivers/input/misc/cm109.c @@ -475,7 +475,7 @@ static void cm109_toggle_buzzer_sync(struct cm109_dev *dev, int on) le16_to_cpu(dev->ctl_req->wIndex), dev->ctl_data, USB_PKT_LEN, USB_CTRL_SET_TIMEOUT); - if (error && error != EINTR) + if (error < 0 && error != -EINTR) err("%s: usb_control_msg() failed %d", __func__, error); } diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index da280189ef0..5ec617e28f7 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -67,6 +67,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI 0x0245 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO 0x0246 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS 0x0247 +/* MacbookAir4,1 (unibody, July 2011) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI 0x0249 +#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO 0x024a +#define USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS 0x024b /* MacbookAir4,2 (unibody, July 2011) */ #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI 0x024c #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO 0x024d @@ -112,6 +116,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS), + /* MacbookAir4,1 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS), /* MacbookAir4,2 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO), @@ -334,6 +342,18 @@ static const struct bcm5974_config bcm5974_config_table[] = { { DIM_X, DIM_X / SN_COORD, -4750, 5280 }, { DIM_Y, DIM_Y / SN_COORD, -150, 6730 } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS, + HAS_INTEGRATED_BUTTON, + 0x84, sizeof(struct bt_data), + 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, + { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, + { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, + { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } + }, {} }; diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index d27c9d91630..958b4eb6369 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -229,13 +229,6 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi get_unaligned_le16(&report[i + 3]); i += 4; } - } else if (usage == WCM_DIGITIZER) { - /* max pressure isn't reported - features->pressure_max = (unsigned short) - (report[i+4] << 8 | report[i + 3]); - */ - features->pressure_max = 255; - i += 4; } break; @@ -291,13 +284,6 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi pen = 1; i++; break; - - case HID_USAGE_UNDEFINED: - if (usage == WCM_DESKTOP && finger) /* capacity */ - features->pressure_max = - get_unaligned_le16(&report[i + 3]); - i += 4; - break; } break; diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index c1c2f7b28d8..9dea71849f4 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -800,25 +800,26 @@ static int wacom_bpt_touch(struct wacom_wac *wacom) int i; for (i = 0; i < 2; i++) { - int p = data[9 * i + 2]; - bool touch = p && !wacom->shared->stylus_in_proximity; + int offset = (data[1] & 0x80) ? (8 * i) : (9 * i); + bool touch = data[offset + 3] & 0x80; - input_mt_slot(input, i); - input_mt_report_slot_state(input, MT_TOOL_FINGER, touch); /* * Touch events need to be disabled while stylus is * in proximity because user's hand is resting on touchpad * and sending unwanted events. User expects tablet buttons * to continue working though. */ + touch = touch && !wacom->shared->stylus_in_proximity; + + input_mt_slot(input, i); + input_mt_report_slot_state(input, MT_TOOL_FINGER, touch); if (touch) { - int x = get_unaligned_be16(&data[9 * i + 3]) & 0x7ff; - int y = get_unaligned_be16(&data[9 * i + 5]) & 0x7ff; + int x = get_unaligned_be16(&data[offset + 3]) & 0x7ff; + int y = get_unaligned_be16(&data[offset + 5]) & 0x7ff; if (features->quirks & WACOM_QUIRK_BBTOUCH_LOWRES) { x <<= 5; y <<= 5; } - input_report_abs(input, ABS_MT_PRESSURE, p); input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); } @@ -1056,10 +1057,11 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, features->x_fuzz, 0); input_set_abs_params(input_dev, ABS_Y, 0, features->y_max, features->y_fuzz, 0); - input_set_abs_params(input_dev, ABS_PRESSURE, 0, features->pressure_max, - features->pressure_fuzz, 0); if (features->device_type == BTN_TOOL_PEN) { + input_set_abs_params(input_dev, ABS_PRESSURE, 0, features->pressure_max, + features->pressure_fuzz, 0); + /* penabled devices have fixed resolution for each model */ input_abs_set_res(input_dev, ABS_X, features->x_resolution); input_abs_set_res(input_dev, ABS_Y, features->y_resolution); @@ -1098,6 +1100,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, __set_bit(BTN_TOOL_MOUSE, input_dev->keybit); __set_bit(BTN_STYLUS, input_dev->keybit); __set_bit(BTN_STYLUS2, input_dev->keybit); + + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); break; case WACOM_21UX2: @@ -1120,12 +1124,12 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, for (i = 0; i < 8; i++) __set_bit(BTN_0 + i, input_dev->keybit); - if (wacom_wac->features.type != WACOM_21UX2) { - input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); - input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); - } - + input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); + input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); + + __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); + wacom_setup_cintiq(wacom_wac); break; @@ -1150,6 +1154,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, /* fall through */ case INTUOS: + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + wacom_setup_intuos(wacom_wac); break; @@ -1165,6 +1171,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); wacom_setup_intuos(wacom_wac); + + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); break; case TABLETPC2FG: @@ -1183,26 +1191,40 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, case TABLETPC: __clear_bit(ABS_MISC, input_dev->absbit); + __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); + if (features->device_type != BTN_TOOL_PEN) break; /* no need to process stylus stuff */ /* fall through */ case PL: - case PTU: case DTU: __set_bit(BTN_TOOL_PEN, input_dev->keybit); + __set_bit(BTN_TOOL_RUBBER, input_dev->keybit); __set_bit(BTN_STYLUS, input_dev->keybit); __set_bit(BTN_STYLUS2, input_dev->keybit); + + __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); + break; + + case PTU: + __set_bit(BTN_STYLUS2, input_dev->keybit); /* fall through */ case PENPARTNER: + __set_bit(BTN_TOOL_PEN, input_dev->keybit); __set_bit(BTN_TOOL_RUBBER, input_dev->keybit); + __set_bit(BTN_STYLUS, input_dev->keybit); + + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); break; case BAMBOO_PT: __clear_bit(ABS_MISC, input_dev->absbit); + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + if (features->device_type == BTN_TOOL_DOUBLETAP) { __set_bit(BTN_LEFT, input_dev->keybit); __set_bit(BTN_FORWARD, input_dev->keybit); diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c index c14412ef464..9941d39df43 100644 --- a/drivers/input/touchscreen/wacom_w8001.c +++ b/drivers/input/touchscreen/wacom_w8001.c @@ -383,6 +383,8 @@ static int w8001_setup(struct w8001 *w8001) dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); strlcat(w8001->name, "Wacom Serial", sizeof(w8001->name)); + __set_bit(INPUT_PROP_DIRECT, dev->propbit); + /* penabled? */ error = w8001_command(w8001, W8001_CMD_QUERY, true); if (!error) { diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 3dc9befa5ae..6dcc7e2d54d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1388,7 +1388,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) return ret; } - ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); + ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); if (ret) printk(KERN_ERR "IOMMU: can't request irq\n"); return ret; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c621c98c99d..a88f3cbb100 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -306,6 +306,11 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & (1 << 7)); +} + static inline int first_pte_in_page(struct dma_pte *pte) { return !((unsigned long)pte & ~VTD_PAGE_MASK); @@ -404,6 +409,9 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; +int intel_iommu_gfx_mapped; +EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); + #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -577,17 +585,18 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) static void domain_update_iommu_superpage(struct dmar_domain *domain) { - int i, mask = 0xf; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + int mask = 0xf; if (!intel_iommu_superpage) { domain->iommu_superpage = 0; return; } - domain->iommu_superpage = 4; /* 1TiB */ - - for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { - mask |= cap_super_page_val(g_iommus[i]->cap); + /* set iommu_superpage to the smallest common denominator */ + for_each_active_iommu(iommu, drhd) { + mask &= cap_super_page_val(iommu->cap); if (!mask) { break; } @@ -730,29 +739,23 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int large_level) + unsigned long pfn, int target_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); - int offset, target_level; + int offset; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; - /* Search pte */ - if (!large_level) - target_level = 1; - else - target_level = large_level; - while (level > 0) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) + if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) break; if (level == target_level) break; @@ -816,13 +819,14 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, } /* clear last level pte, a tlb flush should be followed */ -static void dma_pte_clear_range(struct dmar_domain *domain, +static int dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned int large_page = 1; struct dma_pte *first_pte, *pte; + int order; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -846,6 +850,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, (void *)pte - (void *)first_pte); } while (start_pfn && start_pfn <= last_pfn); + + order = (large_page - 1) * 9; + return order; } /* free page table pages. last level pte should already be cleared */ @@ -3226,9 +3233,6 @@ static void __init init_no_remapping_devices(void) } } - if (dmar_map_gfx) - return; - for_each_drhd_unit(drhd) { int i; if (drhd->ignored || drhd->include_all) @@ -3236,18 +3240,23 @@ static void __init init_no_remapping_devices(void) for (i = 0; i < drhd->devices_cnt; i++) if (drhd->devices[i] && - !IS_GFX_DEVICE(drhd->devices[i])) + !IS_GFX_DEVICE(drhd->devices[i])) break; if (i < drhd->devices_cnt) continue; - /* bypass IOMMU if it is just for gfx devices */ - drhd->ignored = 1; - for (i = 0; i < drhd->devices_cnt; i++) { - if (!drhd->devices[i]) - continue; - drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + /* This IOMMU has *only* gfx devices. Either bypass it or + set the gfx_mapped flag, as appropriate */ + if (dmar_map_gfx) { + intel_iommu_gfx_mapped = 1; + } else { + drhd->ignored = 1; + for (i = 0; i < drhd->devices_cnt; i++) { + if (!drhd->devices[i]) + continue; + drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } } } @@ -3568,6 +3577,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, found = 1; } + spin_unlock_irqrestore(&device_domain_lock, flags); + if (found == 0) { unsigned long tmp_flags; spin_lock_irqsave(&domain->iommu_lock, tmp_flags); @@ -3584,8 +3595,6 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_unlock_irqrestore(&iommu->lock, tmp_flags); } } - - spin_unlock_irqrestore(&device_domain_lock, flags); } static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) @@ -3739,6 +3748,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) vm_domain_exit(dmar_domain); return -ENOMEM; } + domain_update_iommu_cap(dmar_domain); domain->priv = dmar_domain; return 0; @@ -3864,14 +3874,15 @@ static int intel_iommu_unmap(struct iommu_domain *domain, { struct dmar_domain *dmar_domain = domain->priv; size_t size = PAGE_SIZE << gfp_order; + int order; - dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, + order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, (iova + size - 1) >> VTD_PAGE_SHIFT); if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; - return gfp_order; + return order; } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -3950,7 +3961,11 @@ static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev) if (!(ggc & GGC_MEMORY_VT_ENABLED)) { printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); dmar_map_gfx = 0; - } + } else if (dmar_map_gfx) { + /* we have to ensure the gfx device is idle before we flush */ + printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n"); + intel_iommu_strict = 1; + } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c index d87c9d02f78..328c64c0841 100644 --- a/drivers/leds/ledtrig-timer.c +++ b/drivers/leds/ledtrig-timer.c @@ -41,6 +41,7 @@ static ssize_t led_delay_on_store(struct device *dev, if (count == size) { led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off); + led_cdev->blink_delay_on = state; ret = count; } @@ -69,6 +70,7 @@ static ssize_t led_delay_off_store(struct device *dev, if (count == size) { led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state); + led_cdev->blink_delay_off = state; ret = count; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 49da55c1528..8c2a000cf3f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1698,6 +1698,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->num_flush_requests = 1; + ti->discard_zeroes_data_unsupported = 1; + return 0; bad: diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 89f73ca22cf..f84c08029b2 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -81,8 +81,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags> */ if (!strcasecmp(arg_name, "corrupt_bio_byte")) { - if (!argc) + if (!argc) { ti->error = "Feature corrupt_bio_byte requires parameters"; + return -EINVAL; + } r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error); if (r) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index f8214702963..32ac70861d6 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -628,6 +628,7 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, job->kc = kc; job->fn = fn; job->context = context; + job->master_job = job; atomic_inc(&kc->nr_jobs); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index a002dd85db1..86df8b2cf92 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -449,7 +449,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, rs->ti->error = "write_mostly option is only valid for RAID1"; return -EINVAL; } - if (value > rs->md.raid_disks) { + if (value >= rs->md.raid_disks) { rs->ti->error = "Invalid write_mostly drive index given"; return -EINVAL; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 986b8754bb0..bc04518e9d8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1238,14 +1238,15 @@ static void dm_table_set_integrity(struct dm_table *t) return; template_disk = dm_table_get_integrity_disk(t, true); - if (!template_disk && - blk_integrity_is_initialized(dm_disk(t->md))) { + if (template_disk) + blk_integrity_register(dm_disk(t->md), + blk_get_integrity(template_disk)); + else if (blk_integrity_is_initialized(dm_disk(t->md))) DMWARN("%s: device no longer has a valid integrity profile", dm_device_name(t->md)); - return; - } - blk_integrity_register(dm_disk(t->md), - blk_get_integrity(template_disk)); + else + DMWARN("%s: unable to establish an integrity profile", + dm_device_name(t->md)); } static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, @@ -1282,6 +1283,22 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) return 0; } +static bool dm_table_discard_zeroes_data(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + /* Ensure that all targets supports discard_zeroes_data. */ + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (ti->discard_zeroes_data_unsupported) + return 0; + } + + return 1; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1304,6 +1321,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_flush(q, flush); + if (!dm_table_discard_zeroes_data(t)) + q->limits.discard_zeroes_data = 0; + dm_table_set_integrity(t); /* diff --git a/drivers/md/md.c b/drivers/md/md.c index 5404b229582..5c95ccb5950 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -61,6 +61,11 @@ static void autostart_arrays(int part); #endif +/* pers_list is a list of registered personalities protected + * by pers_lock. + * pers_lock does extra service to protect accesses to + * mddev->thread when the mutex cannot be held. + */ static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); @@ -739,7 +744,12 @@ static void mddev_unlock(mddev_t * mddev) } else mutex_unlock(&mddev->reconfig_mutex); + /* was we've dropped the mutex we need a spinlock to + * make sur the thread doesn't disappear + */ + spin_lock(&pers_lock); md_wakeup_thread(mddev->thread); + spin_unlock(&pers_lock); } static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) @@ -6429,11 +6439,18 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, return thread; } -void md_unregister_thread(mdk_thread_t *thread) +void md_unregister_thread(mdk_thread_t **threadp) { + mdk_thread_t *thread = *threadp; if (!thread) return; dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); + /* Locking ensures that mddev_unlock does not wake_up a + * non-existent thread + */ + spin_lock(&pers_lock); + *threadp = NULL; + spin_unlock(&pers_lock); kthread_stop(thread->tsk); kfree(thread); @@ -7340,8 +7357,7 @@ static void reap_sync_thread(mddev_t *mddev) mdk_rdev_t *rdev; /* resync has finished, collect result */ - md_unregister_thread(mddev->sync_thread); - mddev->sync_thread = NULL; + md_unregister_thread(&mddev->sync_thread); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ diff --git a/drivers/md/md.h b/drivers/md/md.h index 1e586bb4452..0a309dc29b4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -560,7 +560,7 @@ extern int register_md_personality(struct mdk_personality *p); extern int unregister_md_personality(struct mdk_personality *p); extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), mddev_t *mddev, const char *name); -extern void md_unregister_thread(mdk_thread_t *thread); +extern void md_unregister_thread(mdk_thread_t **threadp); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); extern void md_write_start(mddev_t *mddev, struct bio *bi); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 3535c23af28..d5b5fb30017 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -514,8 +514,7 @@ static int multipath_stop (mddev_t *mddev) { multipath_conf_t *conf = mddev->private; - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ mempool_destroy(conf->pool); kfree(conf->multipaths); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f4622dd8fc5..d9587dffe53 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2562,8 +2562,7 @@ static int stop(mddev_t *mddev) raise_barrier(conf); lower_barrier(conf); - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d7a8468ddea..0cd9672cf9c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2955,7 +2955,7 @@ static int run(mddev_t *mddev) return 0; out_free_conf: - md_unregister_thread(mddev->thread); + md_unregister_thread(&mddev->thread); if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); @@ -2973,8 +2973,7 @@ static int stop(mddev_t *mddev) raise_barrier(conf, 0); lower_barrier(conf); - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 43709fa6b6d..ac5e8b57e50 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4941,8 +4941,7 @@ static int run(mddev_t *mddev) return 0; abort: - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); if (conf) { print_raid5_conf(conf); free_conf(conf); @@ -4956,8 +4955,7 @@ static int stop(mddev_t *mddev) { raid5_conf_t *conf = mddev->private; - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); if (mddev->queue) mddev->queue->backing_dev_info.congested_fn = NULL; free_conf(conf); diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c index b5ef3622244..b3a5ecdb33a 100644 --- a/drivers/media/video/omap/omap_vout.c +++ b/drivers/media/video/omap/omap_vout.c @@ -2194,19 +2194,6 @@ static int __init omap_vout_probe(struct platform_device *pdev) "'%s' Display already enabled\n", def_display->name); } - /* set the update mode */ - if (def_display->caps & - OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE) { - if (dssdrv->enable_te) - dssdrv->enable_te(def_display, 0); - if (dssdrv->set_update_mode) - dssdrv->set_update_mode(def_display, - OMAP_DSS_UPDATE_MANUAL); - } else { - if (dssdrv->set_update_mode) - dssdrv->set_update_mode(def_display, - OMAP_DSS_UPDATE_AUTO); - } } } diff --git a/drivers/media/video/omap3isp/ispccdc.c b/drivers/media/video/omap3isp/ispccdc.c index 9d3459de04b..80796eb0c53 100644 --- a/drivers/media/video/omap3isp/ispccdc.c +++ b/drivers/media/video/omap3isp/ispccdc.c @@ -31,6 +31,7 @@ #include <linux/dma-mapping.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/slab.h> #include <media/v4l2-event.h> #include "isp.h" diff --git a/drivers/media/video/uvc/uvc_driver.c b/drivers/media/video/uvc/uvc_driver.c index d29f9c2d085..e4100b1f68d 100644 --- a/drivers/media/video/uvc/uvc_driver.c +++ b/drivers/media/video/uvc/uvc_driver.c @@ -1961,7 +1961,7 @@ static int __uvc_resume(struct usb_interface *intf, int reset) list_for_each_entry(stream, &dev->streams, list) { if (stream->intf == intf) - return uvc_video_resume(stream); + return uvc_video_resume(stream, reset); } uvc_trace(UVC_TRACE_SUSPEND, "Resume: video streaming USB interface " diff --git a/drivers/media/video/uvc/uvc_entity.c b/drivers/media/video/uvc/uvc_entity.c index 48fea373c25..29e239911d0 100644 --- a/drivers/media/video/uvc/uvc_entity.c +++ b/drivers/media/video/uvc/uvc_entity.c @@ -49,7 +49,7 @@ static int uvc_mc_register_entity(struct uvc_video_chain *chain, if (remote == NULL) return -EINVAL; - source = (UVC_ENTITY_TYPE(remote) != UVC_TT_STREAMING) + source = (UVC_ENTITY_TYPE(remote) == UVC_TT_STREAMING) ? (remote->vdev ? &remote->vdev->entity : NULL) : &remote->subdev.entity; if (source == NULL) diff --git a/drivers/media/video/uvc/uvc_video.c b/drivers/media/video/uvc/uvc_video.c index 8244167c891..ffd1158628b 100644 --- a/drivers/media/video/uvc/uvc_video.c +++ b/drivers/media/video/uvc/uvc_video.c @@ -1104,10 +1104,18 @@ int uvc_video_suspend(struct uvc_streaming *stream) * buffers, making sure userspace applications are notified of the problem * instead of waiting forever. */ -int uvc_video_resume(struct uvc_streaming *stream) +int uvc_video_resume(struct uvc_streaming *stream, int reset) { int ret; + /* If the bus has been reset on resume, set the alternate setting to 0. + * This should be the default value, but some devices crash or otherwise + * misbehave if they don't receive a SET_INTERFACE request before any + * other video control request. + */ + if (reset) + usb_set_interface(stream->dev->udev, stream->intfnum, 0); + stream->frozen = 0; ret = uvc_commit_video(stream, &stream->ctrl); diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h index df32a43ca86..cbdd49bf8b6 100644 --- a/drivers/media/video/uvc/uvcvideo.h +++ b/drivers/media/video/uvc/uvcvideo.h @@ -638,7 +638,7 @@ extern void uvc_mc_cleanup_entity(struct uvc_entity *entity); /* Video */ extern int uvc_video_init(struct uvc_streaming *stream); extern int uvc_video_suspend(struct uvc_streaming *stream); -extern int uvc_video_resume(struct uvc_streaming *stream); +extern int uvc_video_resume(struct uvc_streaming *stream, int reset); extern int uvc_video_enable(struct uvc_streaming *stream, int enable); extern int uvc_probe_video(struct uvc_streaming *stream, struct uvc_streaming_control *probe); diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c index 06f14008b34..a5c9ed128b9 100644 --- a/drivers/media/video/v4l2-dev.c +++ b/drivers/media/video/v4l2-dev.c @@ -173,6 +173,17 @@ static void v4l2_device_release(struct device *cd) media_device_unregister_entity(&vdev->entity); #endif + /* Do not call v4l2_device_put if there is no release callback set. + * Drivers that have no v4l2_device release callback might free the + * v4l2_dev instance in the video_device release callback below, so we + * must perform this check here. + * + * TODO: In the long run all drivers that use v4l2_device should use the + * v4l2_device release callback. This check will then be unnecessary. + */ + if (v4l2_dev && v4l2_dev->release == NULL) + v4l2_dev = NULL; + /* Release video_device and perform other cleanups as needed. */ vdev->release(vdev); diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c index c72856c4143..e6a2c3b302d 100644 --- a/drivers/media/video/v4l2-device.c +++ b/drivers/media/video/v4l2-device.c @@ -38,6 +38,7 @@ int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev) mutex_init(&v4l2_dev->ioctl_lock); v4l2_prio_init(&v4l2_dev->prio); kref_init(&v4l2_dev->ref); + get_device(dev); v4l2_dev->dev = dev; if (dev == NULL) { /* If dev == NULL, then name must be filled in by the caller */ @@ -93,6 +94,7 @@ void v4l2_device_disconnect(struct v4l2_device *v4l2_dev) if (dev_get_drvdata(v4l2_dev->dev) == v4l2_dev) dev_set_drvdata(v4l2_dev->dev, NULL); + put_device(v4l2_dev->dev); v4l2_dev->dev = NULL; } EXPORT_SYMBOL_GPL(v4l2_device_disconnect); diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c index 21131c7b0f1..563654c9b19 100644 --- a/drivers/mfd/jz4740-adc.c +++ b/drivers/mfd/jz4740-adc.c @@ -273,7 +273,7 @@ static int __devinit jz4740_adc_probe(struct platform_device *pdev) ct->regs.ack = JZ_REG_ADC_STATUS; ct->chip.irq_mask = irq_gc_mask_set_bit; ct->chip.irq_unmask = irq_gc_mask_clr_bit; - ct->chip.irq_ack = irq_gc_ack; + ct->chip.irq_ack = irq_gc_ack_set_bit; irq_setup_generic_chip(gc, IRQ_MSK(5), 0, 0, IRQ_NOPROBE | IRQ_LEVEL); diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c index 5d1fca0277e..f83103b8970 100644 --- a/drivers/mfd/max8997.c +++ b/drivers/mfd/max8997.c @@ -135,10 +135,13 @@ static int max8997_i2c_probe(struct i2c_client *i2c, max8997->dev = &i2c->dev; max8997->i2c = i2c; max8997->type = id->driver_data; + max8997->irq = i2c->irq; if (!pdata) goto err; + max8997->irq_base = pdata->irq_base; + max8997->ono = pdata->ono; max8997->wakeup = pdata->wakeup; mutex_init(&max8997->iolock); @@ -152,6 +155,8 @@ static int max8997_i2c_probe(struct i2c_client *i2c, pm_runtime_set_active(max8997->dev); + max8997_irq_init(max8997); + mfd_add_devices(max8997->dev, -1, max8997_devs, ARRAY_SIZE(max8997_devs), NULL, 0); diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 29601e7d606..86e14583a08 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/delay.h> @@ -676,7 +677,6 @@ static void usbhs_omap_tll_init(struct device *dev, u8 tll_channel_count) | OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF | OMAP_TLL_CHANNEL_CONF_ULPIDDRMODE); - reg |= (1 << (i + 1)); } else continue; diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c index 2bfad5c86cc..a56be931551 100644 --- a/drivers/mfd/tps65910-irq.c +++ b/drivers/mfd/tps65910-irq.c @@ -178,8 +178,10 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq, switch (tps65910_chip_id(tps65910)) { case TPS65910: tps65910->irq_num = TPS65910_NUM_IRQ; + break; case TPS65911: tps65910->irq_num = TPS65911_NUM_IRQ; + break; } /* Register with genirq */ diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c index b5d598c3aa7..7cbf2aa9e64 100644 --- a/drivers/mfd/twl4030-madc.c +++ b/drivers/mfd/twl4030-madc.c @@ -510,8 +510,9 @@ int twl4030_madc_conversion(struct twl4030_madc_request *req) u8 ch_msb, ch_lsb; int ret; - if (!req) + if (!req || !twl4030_madc) return -EINVAL; + mutex_lock(&twl4030_madc->lock); if (req->method < TWL4030_MADC_RT || req->method > TWL4030_MADC_SW2) { ret = -EINVAL; @@ -706,6 +707,8 @@ static int __devinit twl4030_madc_probe(struct platform_device *pdev) if (!madc) return -ENOMEM; + madc->dev = &pdev->dev; + /* * Phoenix provides 2 interrupt lines. The first one is connected to * the OMAP. The other one can be connected to the other processor such diff --git a/drivers/mfd/wm8350-gpio.c b/drivers/mfd/wm8350-gpio.c index ebf99bef392..d584f6b4d6e 100644 --- a/drivers/mfd/wm8350-gpio.c +++ b/drivers/mfd/wm8350-gpio.c @@ -37,7 +37,7 @@ static int gpio_set_dir(struct wm8350 *wm8350, int gpio, int dir) return ret; } -static int gpio_set_debounce(struct wm8350 *wm8350, int gpio, int db) +static int wm8350_gpio_set_debounce(struct wm8350 *wm8350, int gpio, int db) { if (db == WM8350_GPIO_DEBOUNCE_ON) return wm8350_set_bits(wm8350, WM8350_GPIO_DEBOUNCE, @@ -210,7 +210,7 @@ int wm8350_gpio_config(struct wm8350 *wm8350, int gpio, int dir, int func, goto err; if (gpio_set_polarity(wm8350, gpio, pol)) goto err; - if (gpio_set_debounce(wm8350, gpio, debounce)) + if (wm8350_gpio_set_debounce(wm8350, gpio, debounce)) goto err; if (gpio_set_dir(wm8350, gpio, dir)) goto err; diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index b928bc14e97..8b51cd62d06 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -375,12 +375,14 @@ void lis3lv02d_poweron(struct lis3lv02d *lis3) * both have been read. So the value read will always be correct. * Set BOOT bit to refresh factory tuning values. */ - lis3->read(lis3, CTRL_REG2, ®); - if (lis3->whoami == WAI_12B) - reg |= CTRL2_BDU | CTRL2_BOOT; - else - reg |= CTRL2_BOOT_8B; - lis3->write(lis3, CTRL_REG2, reg); + if (lis3->pdata) { + lis3->read(lis3, CTRL_REG2, ®); + if (lis3->whoami == WAI_12B) + reg |= CTRL2_BDU | CTRL2_BOOT; + else + reg |= CTRL2_BOOT_8B; + lis3->write(lis3, CTRL_REG2, reg); + } /* LIS3 power on delay is quite long */ msleep(lis3->pwron_delay / lis3lv02d_get_odr()); diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c index 06df1877ad0..0b56e3f4357 100644 --- a/drivers/misc/pti.c +++ b/drivers/misc/pti.c @@ -165,6 +165,11 @@ static void pti_write_to_aperture(struct pti_masterchannel *mc, static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc, const char *thread_name) { + /* + * Since we access the comm member in current's task_struct, we only + * need to be as large as what 'comm' in that structure is. + */ + char comm[TASK_COMM_LEN]; struct pti_masterchannel mccontrol = {.master = CONTROL_ID, .channel = 0}; const char *thread_name_p; @@ -172,13 +177,6 @@ static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc, u8 control_frame[CONTROL_FRAME_LEN]; if (!thread_name) { - /* - * Since we access the comm member in current's task_struct, - * we only need to be as large as what 'comm' in that - * structure is. - */ - char comm[TASK_COMM_LEN]; - if (!in_interrupt()) get_task_comm(comm, current); else diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 1ff5486213f..4c1a648d00f 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -926,6 +926,9 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, /* * Reliable writes are used to implement Forced Unit Access and * REQ_META accesses, and are supported only on MMCs. + * + * XXX: this really needs a good explanation of why REQ_META + * is treated special. */ bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || (req->cmd_flags & REQ_META)) && diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8d0314dbd94..a44874e24f2 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2535,7 +2535,7 @@ config S6GMAC source "drivers/net/stmmac/Kconfig" config PCH_GBE - tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE" + tristate "Intel EG20T PCH/OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE" depends on PCI select MII ---help--- @@ -2548,10 +2548,11 @@ config PCH_GBE This driver enables Gigabit Ethernet function. This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ - Output Hub), ML7223. - ML7223 IOH is for MP(Media Phone) use. - ML7223 is companion chip for Intel Atom E6xx series. - ML7223 is completely compatible for Intel EG20T PCH. + Output Hub), ML7223/ML7831. + ML7223 IOH is for MP(Media Phone) use. ML7831 IOH is for general + purpose use. + ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7223/ML7831 is completely compatible for Intel EG20T PCH. config FTGMAC100 tristate "Faraday FTGMAC100 Gigabit Ethernet support" diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index c423504a755..9a7eb3b36cf 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -239,13 +239,19 @@ void bnx2x_int_disable(struct bnx2x *bp); * FUNC_N_CLID_X = N * NUM_SPECIAL_CLIENTS + FUNC_0_CLID_X * */ -/* iSCSI L2 */ -#define BNX2X_ISCSI_ETH_CL_ID_IDX 1 -#define BNX2X_ISCSI_ETH_CID 49 +enum { + BNX2X_ISCSI_ETH_CL_ID_IDX, + BNX2X_FCOE_ETH_CL_ID_IDX, + BNX2X_MAX_CNIC_ETH_CL_ID_IDX, +}; -/* FCoE L2 */ -#define BNX2X_FCOE_ETH_CL_ID_IDX 2 -#define BNX2X_FCOE_ETH_CID 50 +#define BNX2X_CNIC_START_ETH_CID 48 +enum { + /* iSCSI L2 */ + BNX2X_ISCSI_ETH_CID = BNX2X_CNIC_START_ETH_CID, + /* FCoE L2 */ + BNX2X_FCOE_ETH_CID, +}; /** Additional rings budgeting */ #ifdef BCM_CNIC @@ -315,6 +321,14 @@ union db_prod { u32 raw; }; +/* dropless fc FW/HW related params */ +#define BRB_SIZE(bp) (CHIP_IS_E3(bp) ? 1024 : 512) +#define MAX_AGG_QS(bp) (CHIP_IS_E1(bp) ? \ + ETH_MAX_AGGREGATION_QUEUES_E1 :\ + ETH_MAX_AGGREGATION_QUEUES_E1H_E2) +#define FW_DROP_LEVEL(bp) (3 + MAX_SPQ_PENDING + MAX_AGG_QS(bp)) +#define FW_PREFETCH_CNT 16 +#define DROPLESS_FC_HEADROOM 100 /* MC hsi */ #define BCM_PAGE_SHIFT 12 @@ -331,15 +345,35 @@ union db_prod { /* SGE ring related macros */ #define NUM_RX_SGE_PAGES 2 #define RX_SGE_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge)) -#define MAX_RX_SGE_CNT (RX_SGE_CNT - 2) +#define NEXT_PAGE_SGE_DESC_CNT 2 +#define MAX_RX_SGE_CNT (RX_SGE_CNT - NEXT_PAGE_SGE_DESC_CNT) /* RX_SGE_CNT is promised to be a power of 2 */ #define RX_SGE_MASK (RX_SGE_CNT - 1) #define NUM_RX_SGE (RX_SGE_CNT * NUM_RX_SGE_PAGES) #define MAX_RX_SGE (NUM_RX_SGE - 1) #define NEXT_SGE_IDX(x) ((((x) & RX_SGE_MASK) == \ - (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1) + (MAX_RX_SGE_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_SGE_DESC_CNT : \ + (x) + 1) #define RX_SGE(x) ((x) & MAX_RX_SGE) +/* + * Number of required SGEs is the sum of two: + * 1. Number of possible opened aggregations (next packet for + * these aggregations will probably consume SGE immidiatelly) + * 2. Rest of BRB blocks divided by 2 (block will consume new SGE only + * after placement on BD for new TPA aggregation) + * + * Takes into account NEXT_PAGE_SGE_DESC_CNT "next" elements on each page + */ +#define NUM_SGE_REQ (MAX_AGG_QS(bp) + \ + (BRB_SIZE(bp) - MAX_AGG_QS(bp)) / 2) +#define NUM_SGE_PG_REQ ((NUM_SGE_REQ + MAX_RX_SGE_CNT - 1) / \ + MAX_RX_SGE_CNT) +#define SGE_TH_LO(bp) (NUM_SGE_REQ + \ + NUM_SGE_PG_REQ * NEXT_PAGE_SGE_DESC_CNT) +#define SGE_TH_HI(bp) (SGE_TH_LO(bp) + DROPLESS_FC_HEADROOM) + /* Manipulate a bit vector defined as an array of u64 */ /* Number of bits in one sge_mask array element */ @@ -551,24 +585,43 @@ struct bnx2x_fastpath { #define NUM_TX_RINGS 16 #define TX_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types)) -#define MAX_TX_DESC_CNT (TX_DESC_CNT - 1) +#define NEXT_PAGE_TX_DESC_CNT 1 +#define MAX_TX_DESC_CNT (TX_DESC_CNT - NEXT_PAGE_TX_DESC_CNT) #define NUM_TX_BD (TX_DESC_CNT * NUM_TX_RINGS) #define MAX_TX_BD (NUM_TX_BD - 1) #define MAX_TX_AVAIL (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2) #define NEXT_TX_IDX(x) ((((x) & MAX_TX_DESC_CNT) == \ - (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1) + (MAX_TX_DESC_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_TX_DESC_CNT : \ + (x) + 1) #define TX_BD(x) ((x) & MAX_TX_BD) #define TX_BD_POFF(x) ((x) & MAX_TX_DESC_CNT) /* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */ #define NUM_RX_RINGS 8 #define RX_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd)) -#define MAX_RX_DESC_CNT (RX_DESC_CNT - 2) +#define NEXT_PAGE_RX_DESC_CNT 2 +#define MAX_RX_DESC_CNT (RX_DESC_CNT - NEXT_PAGE_RX_DESC_CNT) #define RX_DESC_MASK (RX_DESC_CNT - 1) #define NUM_RX_BD (RX_DESC_CNT * NUM_RX_RINGS) #define MAX_RX_BD (NUM_RX_BD - 1) #define MAX_RX_AVAIL (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2) -#define MIN_RX_AVAIL 128 + +/* dropless fc calculations for BDs + * + * Number of BDs should as number of buffers in BRB: + * Low threshold takes into account NEXT_PAGE_RX_DESC_CNT + * "next" elements on each page + */ +#define NUM_BD_REQ BRB_SIZE(bp) +#define NUM_BD_PG_REQ ((NUM_BD_REQ + MAX_RX_DESC_CNT - 1) / \ + MAX_RX_DESC_CNT) +#define BD_TH_LO(bp) (NUM_BD_REQ + \ + NUM_BD_PG_REQ * NEXT_PAGE_RX_DESC_CNT + \ + FW_DROP_LEVEL(bp)) +#define BD_TH_HI(bp) (BD_TH_LO(bp) + DROPLESS_FC_HEADROOM) + +#define MIN_RX_AVAIL ((bp)->dropless_fc ? BD_TH_HI(bp) + 128 : 128) #define MIN_RX_SIZE_TPA_HW (CHIP_IS_E1(bp) ? \ ETH_MIN_RX_CQES_WITH_TPA_E1 : \ @@ -579,7 +632,9 @@ struct bnx2x_fastpath { MIN_RX_AVAIL)) #define NEXT_RX_IDX(x) ((((x) & RX_DESC_MASK) == \ - (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1) + (MAX_RX_DESC_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_RX_DESC_CNT : \ + (x) + 1) #define RX_BD(x) ((x) & MAX_RX_BD) /* @@ -589,14 +644,31 @@ struct bnx2x_fastpath { #define CQE_BD_REL (sizeof(union eth_rx_cqe) / sizeof(struct eth_rx_bd)) #define NUM_RCQ_RINGS (NUM_RX_RINGS * CQE_BD_REL) #define RCQ_DESC_CNT (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe)) -#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - 1) +#define NEXT_PAGE_RCQ_DESC_CNT 1 +#define MAX_RCQ_DESC_CNT (RCQ_DESC_CNT - NEXT_PAGE_RCQ_DESC_CNT) #define NUM_RCQ_BD (RCQ_DESC_CNT * NUM_RCQ_RINGS) #define MAX_RCQ_BD (NUM_RCQ_BD - 1) #define MAX_RCQ_AVAIL (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2) #define NEXT_RCQ_IDX(x) ((((x) & MAX_RCQ_DESC_CNT) == \ - (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1) + (MAX_RCQ_DESC_CNT - 1)) ? \ + (x) + 1 + NEXT_PAGE_RCQ_DESC_CNT : \ + (x) + 1) #define RCQ_BD(x) ((x) & MAX_RCQ_BD) +/* dropless fc calculations for RCQs + * + * Number of RCQs should be as number of buffers in BRB: + * Low threshold takes into account NEXT_PAGE_RCQ_DESC_CNT + * "next" elements on each page + */ +#define NUM_RCQ_REQ BRB_SIZE(bp) +#define NUM_RCQ_PG_REQ ((NUM_BD_REQ + MAX_RCQ_DESC_CNT - 1) / \ + MAX_RCQ_DESC_CNT) +#define RCQ_TH_LO(bp) (NUM_RCQ_REQ + \ + NUM_RCQ_PG_REQ * NEXT_PAGE_RCQ_DESC_CNT + \ + FW_DROP_LEVEL(bp)) +#define RCQ_TH_HI(bp) (RCQ_TH_LO(bp) + DROPLESS_FC_HEADROOM) + /* This is needed for determining of last_max */ #define SUB_S16(a, b) (s16)((s16)(a) - (s16)(b)) @@ -685,24 +757,17 @@ struct bnx2x_fastpath { #define FP_CSB_FUNC_OFF \ offsetof(struct cstorm_status_block_c, func) -#define HC_INDEX_TOE_RX_CQ_CONS 0 /* Formerly Ustorm TOE CQ index */ - /* (HC_INDEX_U_TOE_RX_CQ_CONS) */ -#define HC_INDEX_ETH_RX_CQ_CONS 1 /* Formerly Ustorm ETH CQ index */ - /* (HC_INDEX_U_ETH_RX_CQ_CONS) */ -#define HC_INDEX_ETH_RX_BD_CONS 2 /* Formerly Ustorm ETH BD index */ - /* (HC_INDEX_U_ETH_RX_BD_CONS) */ - -#define HC_INDEX_TOE_TX_CQ_CONS 4 /* Formerly Cstorm TOE CQ index */ - /* (HC_INDEX_C_TOE_TX_CQ_CONS) */ -#define HC_INDEX_ETH_TX_CQ_CONS_COS0 5 /* Formerly Cstorm ETH CQ index */ - /* (HC_INDEX_C_ETH_TX_CQ_CONS) */ -#define HC_INDEX_ETH_TX_CQ_CONS_COS1 6 /* Formerly Cstorm ETH CQ index */ - /* (HC_INDEX_C_ETH_TX_CQ_CONS) */ -#define HC_INDEX_ETH_TX_CQ_CONS_COS2 7 /* Formerly Cstorm ETH CQ index */ - /* (HC_INDEX_C_ETH_TX_CQ_CONS) */ +#define HC_INDEX_ETH_RX_CQ_CONS 1 -#define HC_INDEX_ETH_FIRST_TX_CQ_CONS HC_INDEX_ETH_TX_CQ_CONS_COS0 +#define HC_INDEX_OOO_TX_CQ_CONS 4 + +#define HC_INDEX_ETH_TX_CQ_CONS_COS0 5 + +#define HC_INDEX_ETH_TX_CQ_CONS_COS1 6 +#define HC_INDEX_ETH_TX_CQ_CONS_COS2 7 + +#define HC_INDEX_ETH_FIRST_TX_CQ_CONS HC_INDEX_ETH_TX_CQ_CONS_COS0 #define BNX2X_RX_SB_INDEX \ (&fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS]) @@ -1100,11 +1165,12 @@ struct bnx2x { #define BP_PORT(bp) (bp->pfid & 1) #define BP_FUNC(bp) (bp->pfid) #define BP_ABS_FUNC(bp) (bp->pf_num) -#define BP_E1HVN(bp) (bp->pfid >> 1) -#define BP_VN(bp) (BP_E1HVN(bp)) /*remove when approved*/ -#define BP_L_ID(bp) (BP_E1HVN(bp) << 2) -#define BP_FW_MB_IDX(bp) (BP_PORT(bp) +\ - BP_VN(bp) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2 : 1)) +#define BP_VN(bp) ((bp)->pfid >> 1) +#define BP_MAX_VN_NUM(bp) (CHIP_MODE_IS_4_PORT(bp) ? 2 : 4) +#define BP_L_ID(bp) (BP_VN(bp) << 2) +#define BP_FW_MB_IDX_VN(bp, vn) (BP_PORT(bp) +\ + (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2 : 1)) +#define BP_FW_MB_IDX(bp) BP_FW_MB_IDX_VN(bp, BP_VN(bp)) struct net_device *dev; struct pci_dev *pdev; @@ -1767,7 +1833,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, #define MAX_DMAE_C_PER_PORT 8 #define INIT_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ - BP_E1HVN(bp)) + BP_VN(bp)) #define PMF_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ E1HVN_MAX) @@ -1793,7 +1859,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, /* must be used on a CID before placing it on a HW ring */ #define HW_CID(bp, x) ((BP_PORT(bp) << 23) | \ - (BP_E1HVN(bp) << BNX2X_SWCID_SHIFT) | \ + (BP_VN(bp) << BNX2X_SWCID_SHIFT) | \ (x)) #define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe)) diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index 37e5790681a..c4cbf973641 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -987,8 +987,6 @@ void __bnx2x_link_report(struct bnx2x *bp) void bnx2x_init_rx_rings(struct bnx2x *bp) { int func = BP_FUNC(bp); - int max_agg_queues = CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H_E2; u16 ring_prod; int i, j; @@ -1001,7 +999,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) if (!fp->disable_tpa) { /* Fill the per-aggregtion pool */ - for (i = 0; i < max_agg_queues; i++) { + for (i = 0; i < MAX_AGG_QS(bp); i++) { struct bnx2x_agg_info *tpa_info = &fp->tpa_info[i]; struct sw_rx_bd *first_buf = @@ -1041,7 +1039,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) bnx2x_free_rx_sge_range(bp, fp, ring_prod); bnx2x_free_tpa_pool(bp, fp, - max_agg_queues); + MAX_AGG_QS(bp)); fp->disable_tpa = 1; ring_prod = 0; break; @@ -1137,9 +1135,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp) bnx2x_free_rx_bds(fp); if (!fp->disable_tpa) - bnx2x_free_tpa_pool(bp, fp, CHIP_IS_E1(bp) ? - ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H_E2); + bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp)); } } @@ -3095,15 +3091,20 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) struct bnx2x_fastpath *fp = &bp->fp[index]; int ring_size = 0; u8 cos; + int rx_ring_size = 0; /* if rx_ring_size specified - use it */ - int rx_ring_size = bp->rx_ring_size ? bp->rx_ring_size : - MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp); + if (!bp->rx_ring_size) { - /* allocate at least number of buffers required by FW */ - rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA : - MIN_RX_SIZE_TPA, - rx_ring_size); + rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp); + + /* allocate at least number of buffers required by FW */ + rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA : + MIN_RX_SIZE_TPA, rx_ring_size); + + bp->rx_ring_size = rx_ring_size; + } else + rx_ring_size = bp->rx_ring_size; /* Common */ sb = &bnx2x_fp(bp, index, status_blk); diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h index 223bfeebc59..2dc1199239d 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.h +++ b/drivers/net/bnx2x/bnx2x_cmn.h @@ -1297,7 +1297,7 @@ static inline void bnx2x_init_txdata(struct bnx2x *bp, static inline u8 bnx2x_cnic_eth_cl_id(struct bnx2x *bp, u8 cl_idx) { return bp->cnic_base_cl_id + cl_idx + - (bp->pf_num >> 1) * NON_ETH_CONTEXT_USE; + (bp->pf_num >> 1) * BNX2X_MAX_CNIC_ETH_CL_ID_IDX; } static inline u8 bnx2x_cnic_fw_sb_id(struct bnx2x *bp) diff --git a/drivers/net/bnx2x/bnx2x_dcb.c b/drivers/net/bnx2x/bnx2x_dcb.c index a1e004a82f7..0b4acf67e0c 100644 --- a/drivers/net/bnx2x/bnx2x_dcb.c +++ b/drivers/net/bnx2x/bnx2x_dcb.c @@ -2120,6 +2120,7 @@ static u8 bnx2x_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap) break; case DCB_CAP_ATTR_DCBX: *cap = BNX2X_DCBX_CAPS; + break; default: rval = -EINVAL; break; diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c index 221863059da..cf3e47914dd 100644 --- a/drivers/net/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/bnx2x/bnx2x_ethtool.c @@ -363,13 +363,50 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) } /* advertise the requested speed and duplex if supported */ - cmd->advertising &= bp->port.supported[cfg_idx]; + if (cmd->advertising & ~(bp->port.supported[cfg_idx])) { + DP(NETIF_MSG_LINK, "Advertisement parameters " + "are not supported\n"); + return -EINVAL; + } bp->link_params.req_line_speed[cfg_idx] = SPEED_AUTO_NEG; - bp->link_params.req_duplex[cfg_idx] = DUPLEX_FULL; - bp->port.advertising[cfg_idx] |= (ADVERTISED_Autoneg | + bp->link_params.req_duplex[cfg_idx] = cmd->duplex; + bp->port.advertising[cfg_idx] = (ADVERTISED_Autoneg | cmd->advertising); + if (cmd->advertising) { + + bp->link_params.speed_cap_mask[cfg_idx] = 0; + if (cmd->advertising & ADVERTISED_10baseT_Half) { + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF; + } + if (cmd->advertising & ADVERTISED_10baseT_Full) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL; + if (cmd->advertising & ADVERTISED_100baseT_Full) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL; + + if (cmd->advertising & ADVERTISED_100baseT_Half) { + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF; + } + if (cmd->advertising & ADVERTISED_1000baseT_Half) { + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_1G; + } + if (cmd->advertising & (ADVERTISED_1000baseT_Full | + ADVERTISED_1000baseKX_Full)) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_1G; + + if (cmd->advertising & (ADVERTISED_10000baseT_Full | + ADVERTISED_10000baseKX4_Full | + ADVERTISED_10000baseKR_Full)) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_10G; + } } else { /* forced speed */ /* advertise the requested speed and duplex if supported */ switch (speed) { @@ -1310,10 +1347,7 @@ static void bnx2x_get_ringparam(struct net_device *dev, if (bp->rx_ring_size) ering->rx_pending = bp->rx_ring_size; else - if (bp->state == BNX2X_STATE_OPEN && bp->num_queues) - ering->rx_pending = MAX_RX_AVAIL/bp->num_queues; - else - ering->rx_pending = MAX_RX_AVAIL; + ering->rx_pending = MAX_RX_AVAIL; ering->rx_mini_pending = 0; ering->rx_jumbo_pending = 0; diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index d45b1555a60..ba15bdc5a1a 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -778,9 +778,9 @@ static int bnx2x_ets_e3b0_set_cos_bw(struct bnx2x *bp, { u32 nig_reg_adress_crd_weight = 0; u32 pbf_reg_adress_crd_weight = 0; - /* Calculate and set BW for this COS*/ - const u32 cos_bw_nig = (bw * min_w_val_nig) / total_bw; - const u32 cos_bw_pbf = (bw * min_w_val_pbf) / total_bw; + /* Calculate and set BW for this COS - use 1 instead of 0 for BW */ + const u32 cos_bw_nig = ((bw ? bw : 1) * min_w_val_nig) / total_bw; + const u32 cos_bw_pbf = ((bw ? bw : 1) * min_w_val_pbf) / total_bw; switch (cos_entry) { case 0: @@ -852,18 +852,12 @@ static int bnx2x_ets_e3b0_get_total_bw( /* Calculate total BW requested */ for (cos_idx = 0; cos_idx < ets_params->num_of_cos; cos_idx++) { if (bnx2x_cos_state_bw == ets_params->cos[cos_idx].state) { - - if (0 == ets_params->cos[cos_idx].params.bw_params.bw) { - DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config BW" - "was set to 0\n"); - return -EINVAL; + *total_bw += + ets_params->cos[cos_idx].params.bw_params.bw; } - *total_bw += - ets_params->cos[cos_idx].params.bw_params.bw; - } } - /*Check taotl BW is valid */ + /* Check total BW is valid */ if ((100 != *total_bw) || (0 == *total_bw)) { if (0 == *total_bw) { DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config toatl BW" @@ -1726,7 +1720,7 @@ static int bnx2x_xmac_enable(struct link_params *params, /* Check loopback mode */ if (lb) - val |= XMAC_CTRL_REG_CORE_LOCAL_LPBK; + val |= XMAC_CTRL_REG_LINE_LOCAL_LPBK; REG_WR(bp, xmac_base + XMAC_REG_CTRL, val); bnx2x_set_xumac_nig(params, ((vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) != 0), 1); @@ -3630,6 +3624,12 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, val16); + /* Advertised and set FEC (Forward Error Correction) */ + bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, + MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2, + (MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY | + MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ)); + /* Enable CL37 BAM */ if (REG_RD(bp, params->shmem_base + offsetof(struct shmem_region, dev_info. @@ -5924,7 +5924,7 @@ int bnx2x_set_led(struct link_params *params, (tmp | EMAC_LED_OVERRIDE)); /* * return here without enabling traffic - * LED blink andsetting rate in ON mode. + * LED blink and setting rate in ON mode. * In oper mode, enabling LED blink * and setting rate is needed. */ @@ -5936,7 +5936,11 @@ int bnx2x_set_led(struct link_params *params, * This is a work-around for HW issue found when link * is up in CL73 */ - REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1); + if ((!CHIP_IS_E3(bp)) || + (CHIP_IS_E3(bp) && + mode == LED_MODE_ON)) + REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1); + if (CHIP_IS_E1x(bp) || CHIP_IS_E2(bp) || (mode == LED_MODE_ON)) @@ -10638,8 +10642,7 @@ static struct bnx2x_phy phy_warpcore = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT, .addr = 0xff, .def_md_devad = 0, - .flags = (FLAGS_HW_LOCK_REQUIRED | - FLAGS_TX_ERROR_CHECK), + .flags = FLAGS_HW_LOCK_REQUIRED, .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10765,8 +10768,7 @@ static struct bnx2x_phy phy_8706 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706, .addr = 0xff, .def_md_devad = 0, - .flags = (FLAGS_INIT_XGXS_FIRST | - FLAGS_TX_ERROR_CHECK), + .flags = FLAGS_INIT_XGXS_FIRST, .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10797,8 +10799,7 @@ static struct bnx2x_phy phy_8726 = { .addr = 0xff, .def_md_devad = 0, .flags = (FLAGS_HW_LOCK_REQUIRED | - FLAGS_INIT_XGXS_FIRST | - FLAGS_TX_ERROR_CHECK), + FLAGS_INIT_XGXS_FIRST), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10829,8 +10830,7 @@ static struct bnx2x_phy phy_8727 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727, .addr = 0xff, .def_md_devad = 0, - .flags = (FLAGS_FAN_FAILURE_DET_REQ | - FLAGS_TX_ERROR_CHECK), + .flags = FLAGS_FAN_FAILURE_DET_REQ, .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index f74582a22c6..15f800085bb 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -407,8 +407,8 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type, opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET); opcode |= (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0); - opcode |= ((BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT) | - (BP_E1HVN(bp) << DMAE_COMMAND_DST_VN_SHIFT)); + opcode |= ((BP_VN(bp) << DMAE_CMD_E1HVN_SHIFT) | + (BP_VN(bp) << DMAE_COMMAND_DST_VN_SHIFT)); opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT); #ifdef __BIG_ENDIAN @@ -1419,7 +1419,7 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp) if (!CHIP_IS_E1(bp)) { /* init leading/trailing edge */ if (IS_MF(bp)) { - val = (0xee0f | (1 << (BP_E1HVN(bp) + 4))); + val = (0xee0f | (1 << (BP_VN(bp) + 4))); if (bp->port.pmf) /* enable nig and gpio3 attention */ val |= 0x1100; @@ -1471,7 +1471,7 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp) /* init leading/trailing edge */ if (IS_MF(bp)) { - val = (0xee0f | (1 << (BP_E1HVN(bp) + 4))); + val = (0xee0f | (1 << (BP_VN(bp) + 4))); if (bp->port.pmf) /* enable nig and gpio3 attention */ val |= 0x1100; @@ -2287,7 +2287,7 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp) int vn; bp->vn_weight_sum = 0; - for (vn = VN_0; vn < E1HVN_MAX; vn++) { + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) { u32 vn_cfg = bp->mf_config[vn]; u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT) * 100; @@ -2320,12 +2320,18 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp) CMNG_FLAGS_PER_PORT_FAIRNESS_VN; } +/* returns func by VN for current port */ +static inline int func_by_vn(struct bnx2x *bp, int vn) +{ + return 2 * vn + BP_PORT(bp); +} + static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn) { struct rate_shaping_vars_per_vn m_rs_vn; struct fairness_vars_per_vn m_fair_vn; u32 vn_cfg = bp->mf_config[vn]; - int func = 2*vn + BP_PORT(bp); + int func = func_by_vn(bp, vn); u16 vn_min_rate, vn_max_rate; int i; @@ -2422,7 +2428,7 @@ void bnx2x_read_mf_cfg(struct bnx2x *bp) * * and there are 2 functions per port */ - for (vn = VN_0; vn < E1HVN_MAX; vn++) { + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) { int /*abs*/func = n * (2 * vn + BP_PORT(bp)) + BP_PATH(bp); if (func >= E1H_FUNC_MAX) @@ -2454,7 +2460,7 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type) /* calculate and set min-max rate for each vn */ if (bp->port.pmf) - for (vn = VN_0; vn < E1HVN_MAX; vn++) + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) bnx2x_init_vn_minmax(bp, vn); /* always enable rate shaping and fairness */ @@ -2473,16 +2479,15 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type) static inline void bnx2x_link_sync_notify(struct bnx2x *bp) { - int port = BP_PORT(bp); int func; int vn; /* Set the attention towards other drivers on the same port */ - for (vn = VN_0; vn < E1HVN_MAX; vn++) { - if (vn == BP_E1HVN(bp)) + for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) { + if (vn == BP_VN(bp)) continue; - func = ((vn << 1) | port); + func = func_by_vn(bp, vn); REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 + (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1); } @@ -2577,7 +2582,7 @@ static void bnx2x_pmf_update(struct bnx2x *bp) bnx2x_dcbx_pmf_update(bp); /* enable nig attention */ - val = (0xff0f | (1 << (BP_E1HVN(bp) + 4))); + val = (0xff0f | (1 << (BP_VN(bp) + 4))); if (bp->common.int_block == INT_BLOCK_HC) { REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val); REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val); @@ -2756,8 +2761,14 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, u16 tpa_agg_size = 0; if (!fp->disable_tpa) { - pause->sge_th_hi = 250; - pause->sge_th_lo = 150; + pause->sge_th_lo = SGE_TH_LO(bp); + pause->sge_th_hi = SGE_TH_HI(bp); + + /* validate SGE ring has enough to cross high threshold */ + WARN_ON(bp->dropless_fc && + pause->sge_th_hi + FW_PREFETCH_CNT > + MAX_RX_SGE_CNT * NUM_RX_SGE_PAGES); + tpa_agg_size = min_t(u32, (min_t(u32, 8, MAX_SKB_FRAGS) * SGE_PAGE_SIZE * PAGES_PER_SGE), 0xffff); @@ -2771,10 +2782,21 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, /* pause - not for e1 */ if (!CHIP_IS_E1(bp)) { - pause->bd_th_hi = 350; - pause->bd_th_lo = 250; - pause->rcq_th_hi = 350; - pause->rcq_th_lo = 250; + pause->bd_th_lo = BD_TH_LO(bp); + pause->bd_th_hi = BD_TH_HI(bp); + + pause->rcq_th_lo = RCQ_TH_LO(bp); + pause->rcq_th_hi = RCQ_TH_HI(bp); + /* + * validate that rings have enough entries to cross + * high thresholds + */ + WARN_ON(bp->dropless_fc && + pause->bd_th_hi + FW_PREFETCH_CNT > + bp->rx_ring_size); + WARN_ON(bp->dropless_fc && + pause->rcq_th_hi + FW_PREFETCH_CNT > + NUM_RCQ_RINGS * MAX_RCQ_DESC_CNT); pause->pri_map = 1; } @@ -2802,9 +2824,7 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, * For PF Clients it should be the maximum avaliable number. * VF driver(s) may want to define it to a smaller value. */ - rxq_init->max_tpa_queues = - (CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H_E2); + rxq_init->max_tpa_queues = MAX_AGG_QS(bp); rxq_init->cache_line_log = BNX2X_RX_ALIGN_SHIFT; rxq_init->fw_sb_id = fp->fw_sb_id; @@ -4808,6 +4828,37 @@ void bnx2x_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm, hc_sm->time_to_expire = 0xFFFFFFFF; } + +/* allocates state machine ids. */ +static inline +void bnx2x_map_sb_state_machines(struct hc_index_data *index_data) +{ + /* zero out state machine indices */ + /* rx indices */ + index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; + + /* tx indices */ + index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID; + + /* map indices */ + /* rx indices */ + index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |= + SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + + /* tx indices */ + index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; + index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |= + SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT; +} + static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid, u8 vf_valid, int fw_sb_id, int igu_sb_id) { @@ -4839,6 +4890,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid, hc_sm_p = sb_data_e2.common.state_machine; sb_data_p = (u32 *)&sb_data_e2; data_size = sizeof(struct hc_status_block_data_e2)/sizeof(u32); + bnx2x_map_sb_state_machines(sb_data_e2.index_data); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); @@ -4853,6 +4905,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid, hc_sm_p = sb_data_e1x.common.state_machine; sb_data_p = (u32 *)&sb_data_e1x; data_size = sizeof(struct hc_status_block_data_e1x)/sizeof(u32); + bnx2x_map_sb_state_machines(sb_data_e1x.index_data); } bnx2x_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID], @@ -4890,7 +4943,7 @@ static void bnx2x_init_def_sb(struct bnx2x *bp) int igu_seg_id; int port = BP_PORT(bp); int func = BP_FUNC(bp); - int reg_offset; + int reg_offset, reg_offset_en5; u64 section; int index; struct hc_sp_status_block_data sp_sb_data; @@ -4913,6 +4966,8 @@ static void bnx2x_init_def_sb(struct bnx2x *bp) reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0); + reg_offset_en5 = (port ? MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 : + MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0); for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { int sindex; /* take care of sig[0]..sig[4] */ @@ -4927,7 +4982,7 @@ static void bnx2x_init_def_sb(struct bnx2x *bp) * and not 16 between the different groups */ bp->attn_group[index].sig[4] = REG_RD(bp, - reg_offset + 0x10 + 0x4*index); + reg_offset_en5 + 0x4*index); else bp->attn_group[index].sig[4] = 0; } @@ -5802,7 +5857,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp) * take the UNDI lock to protect undi_unload flow from accessing * registers while we're resetting the chip */ - bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); + bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET); bnx2x_reset_common(bp); REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff); @@ -5814,7 +5869,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp) } REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, val); - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET); bnx2x_init_block(bp, BLOCK_MISC, PHASE_COMMON); @@ -6671,12 +6726,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) if (CHIP_MODE_IS_4_PORT(bp)) dsb_idx = BP_FUNC(bp); else - dsb_idx = BP_E1HVN(bp); + dsb_idx = BP_VN(bp); prod_offset = (CHIP_INT_MODE_IS_BC(bp) ? IGU_BC_BASE_DSB_PROD + dsb_idx : IGU_NORM_BASE_DSB_PROD + dsb_idx); + /* + * igu prods come in chunks of E1HVN_MAX (4) - + * does not matters what is the current chip mode + */ for (i = 0; i < (num_segs * E1HVN_MAX); i += E1HVN_MAX) { addr = IGU_REG_PROD_CONS_MEMORY + @@ -7568,9 +7627,12 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode) u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; u8 *mac_addr = bp->dev->dev_addr; u32 val; + u16 pmc; + /* The mac address is written to entries 1-4 to - preserve entry 0 which is used by the PMF */ - u8 entry = (BP_E1HVN(bp) + 1)*8; + * preserve entry 0 which is used by the PMF + */ + u8 entry = (BP_VN(bp) + 1)*8; val = (mac_addr[0] << 8) | mac_addr[1]; EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry, val); @@ -7579,6 +7641,11 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode) (mac_addr[4] << 8) | mac_addr[5]; EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry + 4, val); + /* Enable the PME and clear the status */ + pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmc); + pmc |= PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS; + pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, pmc); + reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_EN; } else @@ -8546,10 +8613,12 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp) /* Check if there is any driver already loaded */ val = REG_RD(bp, MISC_REG_UNPREPARED); if (val == 0x1) { - /* Check if it is the UNDI driver + + bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET); + /* + * Check if it is the UNDI driver * UNDI driver initializes CID offset for normal bell to 0x7 */ - bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); val = REG_RD(bp, DORQ_REG_NORM_CID_OFST); if (val == 0x7) { u32 reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; @@ -8587,9 +8656,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp) bnx2x_fw_command(bp, reset_code, 0); } - /* now it's safe to release the lock */ - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); - bnx2x_undi_int_disable(bp); port = BP_PORT(bp); @@ -8639,8 +8705,10 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp) bp->fw_seq = (SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); - } else - bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI); + } + + /* now it's safe to release the lock */ + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET); } } @@ -8777,13 +8845,13 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp) static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp) { int pfid = BP_FUNC(bp); - int vn = BP_E1HVN(bp); int igu_sb_id; u32 val; u8 fid, igu_sb_cnt = 0; bp->igu_base_sb = 0xff; if (CHIP_INT_MODE_IS_BC(bp)) { + int vn = BP_VN(bp); igu_sb_cnt = bp->igu_sb_cnt; bp->igu_base_sb = (CHIP_MODE_IS_4_PORT(bp) ? pfid : vn) * FP_SB_MAX_E1x; @@ -9416,6 +9484,10 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) bp->igu_base_sb = 0; } else { bp->common.int_block = INT_BLOCK_IGU; + + /* do not allow device reset during IGU info preocessing */ + bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET); + val = REG_RD(bp, IGU_REG_BLOCK_CONFIGURATION); if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { @@ -9447,6 +9519,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) bnx2x_get_igu_cam_info(bp); + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET); } /* @@ -9473,7 +9546,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) bp->mf_ov = 0; bp->mf_mode = 0; - vn = BP_E1HVN(bp); + vn = BP_VN(bp); if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) { BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n", @@ -9593,13 +9666,6 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp) /* port info */ bnx2x_get_port_hwinfo(bp); - if (!BP_NOMCP(bp)) { - bp->fw_seq = - (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & - DRV_MSG_SEQ_NUMBER_MASK); - BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq); - } - /* Get MAC addresses */ bnx2x_get_mac_hwinfo(bp); @@ -9765,6 +9831,14 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) if (!BP_NOMCP(bp)) bnx2x_undi_unload(bp); + /* init fw_seq after undi_unload! */ + if (!BP_NOMCP(bp)) { + bp->fw_seq = + (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & + DRV_MSG_SEQ_NUMBER_MASK); + BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq); + } + if (CHIP_REV_IS_FPGA(bp)) dev_err(&bp->pdev->dev, "FPGA detected\n"); @@ -10259,17 +10333,21 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, /* clean indirect addresses */ pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS, PCICFG_VENDOR_ID_OFFSET); - /* Clean the following indirect addresses for all functions since it + /* + * Clean the following indirect addresses for all functions since it * is not used by the driver. */ REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0); - REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0); + + if (CHIP_IS_E1x(bp)) { + REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0); + REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0); + REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0); + REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0); + } /* * Enable internal target-read (in case we are probed after PF FLR). diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h index 40266c14e6d..fc7bd0f23c0 100644 --- a/drivers/net/bnx2x/bnx2x_reg.h +++ b/drivers/net/bnx2x/bnx2x_reg.h @@ -1384,6 +1384,18 @@ Latched ump_tx_parity; [31] MCP Latched scpad_parity; */ #define MISC_REG_AEU_ENABLE4_PXP_0 0xa108 #define MISC_REG_AEU_ENABLE4_PXP_1 0xa1a8 +/* [RW 32] fifth 32b for enabling the output for function 0 output0. Mapped + * as follows: [0] PGLUE config_space; [1] PGLUE misc_flr; [2] PGLUE B RBC + * attention [3] PGLUE B RBC parity; [4] ATC attention; [5] ATC parity; [6] + * mstat0 attention; [7] mstat0 parity; [8] mstat1 attention; [9] mstat1 + * parity; [31-10] Reserved; */ +#define MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0 0xa688 +/* [RW 32] Fifth 32b for enabling the output for function 1 output0. Mapped + * as follows: [0] PGLUE config_space; [1] PGLUE misc_flr; [2] PGLUE B RBC + * attention [3] PGLUE B RBC parity; [4] ATC attention; [5] ATC parity; [6] + * mstat0 attention; [7] mstat0 parity; [8] mstat1 attention; [9] mstat1 + * parity; [31-10] Reserved; */ +#define MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 0xa6b0 /* [RW 1] set/clr general attention 0; this will set/clr bit 94 in the aeu 128 bit vector */ #define MISC_REG_AEU_GENERAL_ATTN_0 0xa000 @@ -5320,7 +5332,7 @@ #define XCM_REG_XX_OVFL_EVNT_ID 0x20058 #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS (0x1<<0) #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS (0x1<<1) -#define XMAC_CTRL_REG_CORE_LOCAL_LPBK (0x1<<3) +#define XMAC_CTRL_REG_LINE_LOCAL_LPBK (0x1<<2) #define XMAC_CTRL_REG_RX_EN (0x1<<1) #define XMAC_CTRL_REG_SOFT_RESET (0x1<<6) #define XMAC_CTRL_REG_TX_EN (0x1<<0) @@ -5766,7 +5778,7 @@ #define HW_LOCK_RESOURCE_RECOVERY_LEADER_0 8 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_1 9 #define HW_LOCK_RESOURCE_SPIO 2 -#define HW_LOCK_RESOURCE_UNDI 5 +#define HW_LOCK_RESOURCE_RESET 5 #define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT (0x1<<4) #define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR (0x1<<5) #define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR (0x1<<18) @@ -6853,6 +6865,9 @@ Theotherbitsarereservedandshouldbezero*/ #define MDIO_WC_REG_IEEE0BLK_AUTONEGNP 0x7 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT0 0x10 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1 0x11 +#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2 0x12 +#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY 0x4000 +#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ 0x8000 #define MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150 0x96 #define MDIO_WC_REG_XGXSBLK0_XGXSCONTROL 0x8000 #define MDIO_WC_REG_XGXSBLK0_MISCCONTROL1 0x800e diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c index 771f6803b23..9908f2bbcf7 100644 --- a/drivers/net/bnx2x/bnx2x_stats.c +++ b/drivers/net/bnx2x/bnx2x_stats.c @@ -710,7 +710,8 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp) break; case MAC_TYPE_NONE: /* unreached */ - BNX2X_ERR("stats updated by DMAE but no MAC active\n"); + DP(BNX2X_MSG_STATS, + "stats updated by DMAE but no MAC active\n"); return -1; default: /* unreached */ @@ -1391,7 +1392,7 @@ static void bnx2x_port_stats_base_init(struct bnx2x *bp) static void bnx2x_func_stats_base_init(struct bnx2x *bp) { - int vn, vn_max = IS_MF(bp) ? E1HVN_MAX : E1VN_MAX; + int vn, vn_max = IS_MF(bp) ? BP_MAX_VN_NUM(bp) : E1VN_MAX; u32 func_stx; /* sanity */ @@ -1404,7 +1405,7 @@ static void bnx2x_func_stats_base_init(struct bnx2x *bp) func_stx = bp->func_stx; for (vn = VN_0; vn < vn_max; vn++) { - int mb_idx = CHIP_IS_E1x(bp) ? 2*vn + BP_PORT(bp) : vn; + int mb_idx = BP_FW_MB_IDX_VN(bp, vn); bp->func_stx = SHMEM_RD(bp, func_mb[mb_idx].fw_mb_param); bnx2x_func_stats_init(bp); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a047eb973e3..47b928ed08f 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2168,7 +2168,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } re_arm: - queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); + if (!bond->kill_timers) + queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); out: read_unlock(&bond->lock); } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7f8b20a34ee..d4fbd2e6261 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1440,7 +1440,8 @@ void bond_alb_monitor(struct work_struct *work) } re_arm: - queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); + if (!bond->kill_timers) + queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); out: read_unlock(&bond->lock); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 43f2ea54108..de3d351ccb6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -777,6 +777,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) read_lock(&bond->lock); + if (bond->kill_timers) + goto out; + /* rejoin all groups on bond device */ __bond_resend_igmp_join_requests(bond->dev); @@ -790,9 +793,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) __bond_resend_igmp_join_requests(vlan_dev); } - if (--bond->igmp_retrans > 0) + if ((--bond->igmp_retrans > 0) && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - +out: read_unlock(&bond->lock); } @@ -1432,6 +1435,8 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) struct sk_buff *skb = *pskb; struct slave *slave; struct bonding *bond; + void (*recv_probe)(struct sk_buff *, struct bonding *, + struct slave *); skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) @@ -1445,11 +1450,12 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) if (bond->params.arp_interval) slave->dev->last_rx = jiffies; - if (bond->recv_probe) { + recv_probe = ACCESS_ONCE(bond->recv_probe); + if (recv_probe) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); if (likely(nskb)) { - bond->recv_probe(nskb, bond, slave); + recv_probe(nskb, bond, slave); dev_kfree_skb(nskb); } } @@ -2538,7 +2544,7 @@ void bond_mii_monitor(struct work_struct *work) } re_arm: - if (bond->params.miimon) + if (bond->params.miimon && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->mii_work, msecs_to_jiffies(bond->params.miimon)); out: @@ -2886,7 +2892,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) } re_arm: - if (bond->params.arp_interval) + if (bond->params.arp_interval && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); out: read_unlock(&bond->lock); @@ -3154,7 +3160,7 @@ void bond_activebackup_arp_mon(struct work_struct *work) bond_ab_arp_probe(bond); re_arm: - if (bond->params.arp_interval) + if (bond->params.arp_interval && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); out: read_unlock(&bond->lock); diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index 92feac68b66..4cc6f44c2ba 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -261,11 +261,13 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev) void __iomem *data = ®s->tx.dsr1_0; u16 *payload = (u16 *)frame->data; - /* It is safe to write into dsr[dlc+1] */ - for (i = 0; i < (frame->can_dlc + 1) / 2; i++) { + for (i = 0; i < frame->can_dlc / 2; i++) { out_be16(data, *payload++); data += 2 + _MSCAN_RESERVED_DSR_SIZE; } + /* write remaining byte if necessary */ + if (frame->can_dlc & 1) + out_8(data, frame->data[frame->can_dlc - 1]); } out_8(®s->tx.dlr, frame->can_dlc); @@ -330,10 +332,13 @@ static void mscan_get_rx_frame(struct net_device *dev, struct can_frame *frame) void __iomem *data = ®s->rx.dsr1_0; u16 *payload = (u16 *)frame->data; - for (i = 0; i < (frame->can_dlc + 1) / 2; i++) { + for (i = 0; i < frame->can_dlc / 2; i++) { *payload++ = in_be16(data); data += 2 + _MSCAN_RESERVED_DSR_SIZE; } + /* read remaining byte if necessary */ + if (frame->can_dlc & 1) + frame->data[frame->can_dlc - 1] = in_8(data); } out_8(®s->canrflg, MSCAN_RXF); diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index a81249246ec..2adc294f512 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -46,6 +46,7 @@ #include <linux/skbuff.h> #include <linux/platform_device.h> #include <linux/clk.h> +#include <linux/io.h> #include <linux/can/dev.h> #include <linux/can/error.h> diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c index 805076c54f1..da5a5d9b8af 100644 --- a/drivers/net/cxgb3/cxgb3_offload.c +++ b/drivers/net/cxgb3/cxgb3_offload.c @@ -1146,12 +1146,14 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) if (te && te->ctx && te->client && te->client->redirect) { update_tcb = te->client->redirect(te->ctx, old, new, e); if (update_tcb) { + rcu_read_lock(); l2t_hold(L2DATA(tdev), e); + rcu_read_unlock(); set_l2t_ix(tdev, tid, e); } } } - l2t_release(L2DATA(tdev), e); + l2t_release(tdev, e); } /* @@ -1264,7 +1266,7 @@ int cxgb3_offload_activate(struct adapter *adapter) goto out_free; err = -ENOMEM; - L2DATA(dev) = t3_init_l2t(l2t_capacity); + RCU_INIT_POINTER(dev->l2opt, t3_init_l2t(l2t_capacity)); if (!L2DATA(dev)) goto out_free; @@ -1298,16 +1300,24 @@ int cxgb3_offload_activate(struct adapter *adapter) out_free_l2t: t3_free_l2t(L2DATA(dev)); - L2DATA(dev) = NULL; + rcu_assign_pointer(dev->l2opt, NULL); out_free: kfree(t); return err; } +static void clean_l2_data(struct rcu_head *head) +{ + struct l2t_data *d = container_of(head, struct l2t_data, rcu_head); + t3_free_l2t(d); +} + + void cxgb3_offload_deactivate(struct adapter *adapter) { struct t3cdev *tdev = &adapter->tdev; struct t3c_data *t = T3C_DATA(tdev); + struct l2t_data *d; remove_adapter(adapter); if (list_empty(&adapter_list)) @@ -1315,8 +1325,11 @@ void cxgb3_offload_deactivate(struct adapter *adapter) free_tid_maps(&t->tid_maps); T3C_DATA(tdev) = NULL; - t3_free_l2t(L2DATA(tdev)); - L2DATA(tdev) = NULL; + rcu_read_lock(); + d = L2DATA(tdev); + rcu_read_unlock(); + rcu_assign_pointer(tdev->l2opt, NULL); + call_rcu(&d->rcu_head, clean_l2_data); if (t->nofail_skb) kfree_skb(t->nofail_skb); kfree(t); diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c index f452c400325..41540978a17 100644 --- a/drivers/net/cxgb3/l2t.c +++ b/drivers/net/cxgb3/l2t.c @@ -300,14 +300,21 @@ static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh) struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh, struct net_device *dev) { - struct l2t_entry *e; - struct l2t_data *d = L2DATA(cdev); + struct l2t_entry *e = NULL; + struct l2t_data *d; + int hash; u32 addr = *(u32 *) neigh->primary_key; int ifidx = neigh->dev->ifindex; - int hash = arp_hash(addr, ifidx, d); struct port_info *p = netdev_priv(dev); int smt_idx = p->port_id; + rcu_read_lock(); + d = L2DATA(cdev); + if (!d) + goto done_rcu; + + hash = arp_hash(addr, ifidx, d); + write_lock_bh(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) if (e->addr == addr && e->ifindex == ifidx && @@ -338,6 +345,8 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh, } done: write_unlock_bh(&d->lock); +done_rcu: + rcu_read_unlock(); return e; } diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h index 7a12d52ed4f..c5f54796e2c 100644 --- a/drivers/net/cxgb3/l2t.h +++ b/drivers/net/cxgb3/l2t.h @@ -76,6 +76,7 @@ struct l2t_data { atomic_t nfree; /* number of free entries */ rwlock_t lock; struct l2t_entry l2tab[0]; + struct rcu_head rcu_head; /* to handle rcu cleanup */ }; typedef void (*arp_failure_handler_func)(struct t3cdev * dev, @@ -99,7 +100,7 @@ static inline void set_arp_failure_handler(struct sk_buff *skb, /* * Getting to the L2 data from an offload device. */ -#define L2DATA(dev) ((dev)->l2opt) +#define L2DATA(cdev) (rcu_dereference((cdev)->l2opt)) #define W_TCB_L2T_IX 0 #define S_TCB_L2T_IX 7 @@ -126,15 +127,22 @@ static inline int l2t_send(struct t3cdev *dev, struct sk_buff *skb, return t3_l2t_send_slow(dev, skb, e); } -static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e) +static inline void l2t_release(struct t3cdev *t, struct l2t_entry *e) { - if (atomic_dec_and_test(&e->refcnt)) + struct l2t_data *d; + + rcu_read_lock(); + d = L2DATA(t); + + if (atomic_dec_and_test(&e->refcnt) && d) t3_l2e_free(d, e); + + rcu_read_unlock(); } static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) { - if (atomic_add_return(1, &e->refcnt) == 1) /* 0 -> 1 transition */ + if (d && atomic_add_return(1, &e->refcnt) == 1) /* 0 -> 1 transition */ atomic_dec(&d->nfree); } diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c index c9957b7f17b..b4efa292fd6 100644 --- a/drivers/net/cxgb4/cxgb4_main.c +++ b/drivers/net/cxgb4/cxgb4_main.c @@ -3712,6 +3712,9 @@ static int __devinit init_one(struct pci_dev *pdev, setup_debugfs(adapter); } + /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ + pdev->needs_freset = 1; + if (is_offload(adapter)) attach_ulds(adapter); diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index 8545c7aa93e..a5a89ecb6f3 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -4026,6 +4026,12 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw) checksum += eeprom_data; } +#ifdef CONFIG_PARISC + /* This is a signature and not a checksum on HP c8000 */ + if ((hw->subsystem_vendor_id == 0x103C) && (eeprom_data == 0x16d6)) + return E1000_SUCCESS; + +#endif if (checksum == (u16) EEPROM_SUM) return E1000_SUCCESS; else { diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index 25a8c2adb00..0caf3c323ec 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c @@ -1669,10 +1669,10 @@ static int gfar_get_cls_all(struct gfar_private *priv, u32 i = 0; list_for_each_entry(comp, &priv->rx_list.list, list) { - if (i <= cmd->rule_cnt) { - rule_locs[i] = comp->fs.location; - i++; - } + if (i == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[i] = comp->fs.location; + i++; } cmd->data = MAX_FILER_IDX; diff --git a/drivers/net/greth.c b/drivers/net/greth.c index 16ce45c1193..52a39000c42 100644 --- a/drivers/net/greth.c +++ b/drivers/net/greth.c @@ -428,6 +428,7 @@ greth_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_sync_single_for_device(greth->dev, dma_addr, skb->len, DMA_TO_DEVICE); status = GRETH_BD_EN | GRETH_BD_IE | (skb->len & GRETH_BD_LEN); + greth->tx_bufs_length[greth->tx_next] = skb->len & GRETH_BD_LEN; /* Wrap around descriptor ring */ if (greth->tx_next == GRETH_TXBD_NUM_MASK) { @@ -490,7 +491,8 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) if (nr_frags != 0) status = GRETH_TXBD_MORE; - status |= GRETH_TXBD_CSALL; + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= GRETH_TXBD_CSALL; status |= skb_headlen(skb) & GRETH_BD_LEN; if (greth->tx_next == GRETH_TXBD_NUM_MASK) status |= GRETH_BD_WR; @@ -513,7 +515,9 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) greth->tx_skbuff[curr_tx] = NULL; bdp = greth->tx_bd_base + curr_tx; - status = GRETH_TXBD_CSALL | GRETH_BD_EN; + status = GRETH_BD_EN; + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= GRETH_TXBD_CSALL; status |= frag->size & GRETH_BD_LEN; /* Wrap around descriptor ring */ @@ -641,6 +645,7 @@ static void greth_clean_tx(struct net_device *dev) dev->stats.tx_fifo_errors++; } dev->stats.tx_packets++; + dev->stats.tx_bytes += greth->tx_bufs_length[greth->tx_last]; greth->tx_last = NEXT_TX(greth->tx_last); greth->tx_free++; } @@ -695,6 +700,7 @@ static void greth_clean_tx_gbit(struct net_device *dev) greth->tx_skbuff[greth->tx_last] = NULL; greth_update_tx_stats(dev, stat); + dev->stats.tx_bytes += skb->len; bdp = greth->tx_bd_base + greth->tx_last; @@ -796,6 +802,7 @@ static int greth_rx(struct net_device *dev, int limit) memcpy(skb_put(skb, pkt_len), phys_to_virt(dma_addr), pkt_len); skb->protocol = eth_type_trans(skb, dev); + dev->stats.rx_bytes += pkt_len; dev->stats.rx_packets++; netif_receive_skb(skb); } @@ -910,6 +917,7 @@ static int greth_rx_gbit(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; netif_receive_skb(skb); greth->rx_skbuff[greth->rx_cur] = newskb; diff --git a/drivers/net/greth.h b/drivers/net/greth.h index 9a0040dee4d..232a622a85b 100644 --- a/drivers/net/greth.h +++ b/drivers/net/greth.h @@ -103,6 +103,7 @@ struct greth_private { unsigned char *tx_bufs[GRETH_TXBD_NUM]; unsigned char *rx_bufs[GRETH_RXBD_NUM]; + u16 tx_bufs_length[GRETH_TXBD_NUM]; u16 tx_next; u16 tx_last; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 3e667926940..d393f1e764e 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -636,8 +636,8 @@ static int ibmveth_open(struct net_device *netdev) netdev_err(netdev, "unable to request irq 0x%x, rc %d\n", netdev->irq, rc); do { - rc = h_free_logical_lan(adapter->vdev->unit_address); - } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); + lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); goto err_out; } @@ -757,7 +757,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) struct ibmveth_adapter *adapter = netdev_priv(dev); unsigned long set_attr, clr_attr, ret_attr; unsigned long set_attr6, clr_attr6; - long ret, ret6; + long ret, ret4, ret6; int rc1 = 0, rc2 = 0; int restart = 0; @@ -770,6 +770,8 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) set_attr = 0; clr_attr = 0; + set_attr6 = 0; + clr_attr6 = 0; if (data) { set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM; @@ -784,16 +786,20 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) && !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) && (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) { - ret = h_illan_attributes(adapter->vdev->unit_address, clr_attr, + ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, set_attr, &ret_attr); - if (ret != H_SUCCESS) { + if (ret4 != H_SUCCESS) { netdev_err(dev, "unable to change IPv4 checksum " "offload settings. %d rc=%ld\n", - data, ret); + data, ret4); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr, clr_attr, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IP_CSUM; - ret = h_illan_attributes(adapter->vdev->unit_address, - set_attr, clr_attr, &ret_attr); } else { adapter->fw_ipv4_csum_support = data; } @@ -804,15 +810,18 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) if (ret6 != H_SUCCESS) { netdev_err(dev, "unable to change IPv6 checksum " "offload settings. %d rc=%ld\n", - data, ret); + data, ret6); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr6, clr_attr6, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IPV6_CSUM; - ret = h_illan_attributes(adapter->vdev->unit_address, - set_attr6, clr_attr6, - &ret_attr); } else adapter->fw_ipv6_csum_support = data; - if (ret != H_SUCCESS || ret6 != H_SUCCESS) + if (ret4 == H_SUCCESS || ret6 == H_SUCCESS) adapter->rx_csum = data; else rc1 = -EIO; @@ -930,6 +939,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, union ibmveth_buf_desc descs[6]; int last, i; int force_bounce = 0; + dma_addr_t dma_addr; /* * veth handles a maximum of 6 segments including the header, so @@ -994,17 +1004,16 @@ retry_bounce: } /* Map the header */ - descs[0].fields.address = dma_map_single(&adapter->vdev->dev, skb->data, - skb_headlen(skb), - DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->vdev->dev, descs[0].fields.address)) + dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) goto map_failed; descs[0].fields.flags_len = desc_flags | skb_headlen(skb); + descs[0].fields.address = dma_addr; /* Map the frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - unsigned long dma_addr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr = dma_map_page(&adapter->vdev->dev, frag->page, @@ -1026,7 +1035,12 @@ retry_bounce: netdev->stats.tx_bytes += skb->len; } - for (i = 0; i < skb_shinfo(skb)->nr_frags + 1; i++) + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); + + for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 22790394318..e1fcc958927 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1321,8 +1321,8 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (ring_is_rsc_enabled(rx_ring)) pkt_is_rsc = ixgbe_get_rsc_state(rx_desc); - /* if this is a skb from previous receive DMA will be 0 */ - if (rx_buffer_info->dma) { + /* linear means we are building an skb from multiple pages */ + if (!skb_is_nonlinear(skb)) { u16 hlen; if (pkt_is_rsc && !(staterr & IXGBE_RXD_STAT_EOP) && diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 05172c39a0c..376e3e94bae 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -239,7 +239,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) dest = macvlan_hash_lookup(port, eth->h_dest); if (dest && dest->mode == MACVLAN_MODE_BRIDGE) { /* send to lowerdev first for its network taps */ - vlan->forward(vlan->lowerdev, skb); + dev_forward_skb(vlan->lowerdev, skb); return NET_XMIT_SUCCESS; } diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 6e03de034ac..f76ab6bf309 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -172,7 +172,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, memset(ring->buf, 0, ring->buf_size); ring->qp_state = MLX4_QP_STATE_RST; - ring->doorbell_qpn = swab32(ring->qp.qpn << 8); + ring->doorbell_qpn = ring->qp.qpn << 8; mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, ring->cqn, &ring->context); @@ -791,7 +791,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_orphan(skb); if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) { - *(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn; + *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); op_own |= htonl((bf_index & 0xffff) << 8); /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ @@ -812,7 +812,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) wmb(); tx_desc->ctrl.owner_opcode = op_own; wmb(); - writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); + iowrite32be(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); } /* Poll CQ here */ diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index dfc82720065..e8882023576 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -307,6 +307,11 @@ static ssize_t store_enabled(struct netconsole_target *nt, return err; if (enabled < 0 || enabled > 1) return -EINVAL; + if (enabled == nt->enabled) { + printk(KERN_INFO "netconsole: network logging has already %s\n", + nt->enabled ? "started" : "stopped"); + return -EINVAL; + } if (enabled) { /* 1 */ @@ -799,5 +804,11 @@ static void __exit cleanup_netconsole(void) } } -module_init(init_netconsole); +/* + * Use late_initcall to ensure netconsole is + * initialized after network device driver if built-in. + * + * late_initcall() and module_init() are identical if built as module. + */ +late_initcall(init_netconsole); module_exit(cleanup_netconsole); diff --git a/drivers/net/pch_gbe/pch_gbe.h b/drivers/net/pch_gbe/pch_gbe.h index 59fac77d0db..a09a07197eb 100644 --- a/drivers/net/pch_gbe/pch_gbe.h +++ b/drivers/net/pch_gbe/pch_gbe.h @@ -127,8 +127,8 @@ struct pch_gbe_regs { /* Reset */ #define PCH_GBE_ALL_RST 0x80000000 /* All reset */ -#define PCH_GBE_TX_RST 0x40000000 /* TX MAC, TX FIFO, TX DMA reset */ -#define PCH_GBE_RX_RST 0x04000000 /* RX MAC, RX FIFO, RX DMA reset */ +#define PCH_GBE_TX_RST 0x00008000 /* TX MAC, TX FIFO, TX DMA reset */ +#define PCH_GBE_RX_RST 0x00004000 /* RX MAC, RX FIFO, RX DMA reset */ /* TCP/IP Accelerator Control */ #define PCH_GBE_EX_LIST_EN 0x00000008 /* External List Enable */ @@ -276,6 +276,9 @@ struct pch_gbe_regs { #define PCH_GBE_RX_DMA_EN 0x00000002 /* Enables Receive DMA */ #define PCH_GBE_TX_DMA_EN 0x00000001 /* Enables Transmission DMA */ +/* RX DMA STATUS */ +#define PCH_GBE_IDLE_CHECK 0xFFFFFFFE + /* Wake On LAN Status */ #define PCH_GBE_WLS_BR 0x00000008 /* Broadcas Address */ #define PCH_GBE_WLS_MLT 0x00000004 /* Multicast Address */ @@ -471,6 +474,7 @@ struct pch_gbe_tx_desc { struct pch_gbe_buffer { struct sk_buff *skb; dma_addr_t dma; + unsigned char *rx_buffer; unsigned long time_stamp; u16 length; bool mapped; @@ -511,6 +515,9 @@ struct pch_gbe_tx_ring { struct pch_gbe_rx_ring { struct pch_gbe_rx_desc *desc; dma_addr_t dma; + unsigned char *rx_buff_pool; + dma_addr_t rx_buff_pool_logic; + unsigned int rx_buff_pool_size; unsigned int size; unsigned int count; unsigned int next_to_use; @@ -622,6 +629,7 @@ struct pch_gbe_adapter { unsigned long rx_buffer_len; unsigned long tx_queue_len; bool have_msi; + bool rx_stop_flag; }; extern const char pch_driver_version[]; diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index eac3c5ca973..b8b4ba27b0e 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -20,7 +20,6 @@ #include "pch_gbe.h" #include "pch_gbe_api.h" -#include <linux/prefetch.h> #define DRV_VERSION "1.00" const char pch_driver_version[] = DRV_VERSION; @@ -34,11 +33,15 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_WATCHDOG_PERIOD (1 * HZ) /* watchdog time */ #define PCH_GBE_COPYBREAK_DEFAULT 256 #define PCH_GBE_PCI_BAR 1 +#define PCH_GBE_RESERVE_MEMORY 0x200000 /* 2MB */ /* Macros for ML7223 */ #define PCI_VENDOR_ID_ROHM 0x10db #define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 +/* Macros for ML7831 */ +#define PCI_DEVICE_ID_ROHM_ML7831_GBE 0x8802 + #define PCH_GBE_TX_WEIGHT 64 #define PCH_GBE_RX_WEIGHT 64 #define PCH_GBE_RX_BUFFER_WRITE 16 @@ -52,6 +55,7 @@ const char pch_driver_version[] = DRV_VERSION; ) /* Ethertype field values */ +#define PCH_GBE_MAX_RX_BUFFER_SIZE 0x2880 #define PCH_GBE_MAX_JUMBO_FRAME_SIZE 10318 #define PCH_GBE_FRAME_SIZE_2048 2048 #define PCH_GBE_FRAME_SIZE_4096 4096 @@ -83,10 +87,12 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_INT_ENABLE_MASK ( \ PCH_GBE_INT_RX_DMA_CMPLT | \ PCH_GBE_INT_RX_DSC_EMP | \ + PCH_GBE_INT_RX_FIFO_ERR | \ PCH_GBE_INT_WOL_DET | \ PCH_GBE_INT_TX_CMPLT \ ) +#define PCH_GBE_INT_DISABLE_ALL 0 static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT; @@ -138,6 +144,27 @@ static void pch_gbe_wait_clr_bit(void *reg, u32 bit) if (!tmp) pr_err("Error: busy bit is not cleared\n"); } + +/** + * pch_gbe_wait_clr_bit_irq - Wait to clear a bit for interrupt context + * @reg: Pointer of register + * @busy: Busy bit + */ +static int pch_gbe_wait_clr_bit_irq(void *reg, u32 bit) +{ + u32 tmp; + int ret = -1; + /* wait busy */ + tmp = 20; + while ((ioread32(reg) & bit) && --tmp) + udelay(5); + if (!tmp) + pr_err("Error: busy bit is not cleared\n"); + else + ret = 0; + return ret; +} + /** * pch_gbe_mac_mar_set - Set MAC address register * @hw: Pointer to the HW structure @@ -189,6 +216,17 @@ static void pch_gbe_mac_reset_hw(struct pch_gbe_hw *hw) return; } +static void pch_gbe_mac_reset_rx(struct pch_gbe_hw *hw) +{ + /* Read the MAC address. and store to the private data */ + pch_gbe_mac_read_mac_addr(hw); + iowrite32(PCH_GBE_RX_RST, &hw->reg->RESET); + pch_gbe_wait_clr_bit_irq(&hw->reg->RESET, PCH_GBE_RX_RST); + /* Setup the MAC address */ + pch_gbe_mac_mar_set(hw, hw->mac.addr, 0); + return; +} + /** * pch_gbe_mac_init_rx_addrs - Initialize receive address's * @hw: Pointer to the HW structure @@ -671,13 +709,8 @@ static void pch_gbe_setup_rctl(struct pch_gbe_adapter *adapter) tcpip = ioread32(&hw->reg->TCPIP_ACC); - if (netdev->features & NETIF_F_RXCSUM) { - tcpip &= ~PCH_GBE_RX_TCPIPACC_OFF; - tcpip |= PCH_GBE_RX_TCPIPACC_EN; - } else { - tcpip |= PCH_GBE_RX_TCPIPACC_OFF; - tcpip &= ~PCH_GBE_RX_TCPIPACC_EN; - } + tcpip |= PCH_GBE_RX_TCPIPACC_OFF; + tcpip &= ~PCH_GBE_RX_TCPIPACC_EN; iowrite32(tcpip, &hw->reg->TCPIP_ACC); return; } @@ -717,13 +750,6 @@ static void pch_gbe_configure_rx(struct pch_gbe_adapter *adapter) iowrite32(rdba, &hw->reg->RX_DSC_BASE); iowrite32(rdlen, &hw->reg->RX_DSC_SIZE); iowrite32((rdba + rdlen), &hw->reg->RX_DSC_SW_P); - - /* Enables Receive DMA */ - rxdma = ioread32(&hw->reg->DMA_CTRL); - rxdma |= PCH_GBE_RX_DMA_EN; - iowrite32(rxdma, &hw->reg->DMA_CTRL); - /* Enables Receive */ - iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN); } /** @@ -1097,6 +1123,48 @@ void pch_gbe_update_stats(struct pch_gbe_adapter *adapter) spin_unlock_irqrestore(&adapter->stats_lock, flags); } +static void pch_gbe_stop_receive(struct pch_gbe_adapter *adapter) +{ + struct pch_gbe_hw *hw = &adapter->hw; + u32 rxdma; + u16 value; + int ret; + + /* Disable Receive DMA */ + rxdma = ioread32(&hw->reg->DMA_CTRL); + rxdma &= ~PCH_GBE_RX_DMA_EN; + iowrite32(rxdma, &hw->reg->DMA_CTRL); + /* Wait Rx DMA BUS is IDLE */ + ret = pch_gbe_wait_clr_bit_irq(&hw->reg->RX_DMA_ST, PCH_GBE_IDLE_CHECK); + if (ret) { + /* Disable Bus master */ + pci_read_config_word(adapter->pdev, PCI_COMMAND, &value); + value &= ~PCI_COMMAND_MASTER; + pci_write_config_word(adapter->pdev, PCI_COMMAND, value); + /* Stop Receive */ + pch_gbe_mac_reset_rx(hw); + /* Enable Bus master */ + value |= PCI_COMMAND_MASTER; + pci_write_config_word(adapter->pdev, PCI_COMMAND, value); + } else { + /* Stop Receive */ + pch_gbe_mac_reset_rx(hw); + } +} + +static void pch_gbe_start_receive(struct pch_gbe_hw *hw) +{ + u32 rxdma; + + /* Enables Receive DMA */ + rxdma = ioread32(&hw->reg->DMA_CTRL); + rxdma |= PCH_GBE_RX_DMA_EN; + iowrite32(rxdma, &hw->reg->DMA_CTRL); + /* Enables Receive */ + iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN); + return; +} + /** * pch_gbe_intr - Interrupt Handler * @irq: Interrupt number @@ -1123,7 +1191,17 @@ static irqreturn_t pch_gbe_intr(int irq, void *data) if (int_st & PCH_GBE_INT_RX_FRAME_ERR) adapter->stats.intr_rx_frame_err_count++; if (int_st & PCH_GBE_INT_RX_FIFO_ERR) - adapter->stats.intr_rx_fifo_err_count++; + if (!adapter->rx_stop_flag) { + adapter->stats.intr_rx_fifo_err_count++; + pr_debug("Rx fifo over run\n"); + adapter->rx_stop_flag = true; + int_en = ioread32(&hw->reg->INT_EN); + iowrite32((int_en & ~PCH_GBE_INT_RX_FIFO_ERR), + &hw->reg->INT_EN); + pch_gbe_stop_receive(adapter); + int_st |= ioread32(&hw->reg->INT_ST); + int_st = int_st & ioread32(&hw->reg->INT_EN); + } if (int_st & PCH_GBE_INT_RX_DMA_ERR) adapter->stats.intr_rx_dma_err_count++; if (int_st & PCH_GBE_INT_TX_FIFO_ERR) @@ -1135,21 +1213,18 @@ static irqreturn_t pch_gbe_intr(int irq, void *data) /* When Rx descriptor is empty */ if ((int_st & PCH_GBE_INT_RX_DSC_EMP)) { adapter->stats.intr_rx_dsc_empty_count++; - pr_err("Rx descriptor is empty\n"); + pr_debug("Rx descriptor is empty\n"); int_en = ioread32(&hw->reg->INT_EN); iowrite32((int_en & ~PCH_GBE_INT_RX_DSC_EMP), &hw->reg->INT_EN); if (hw->mac.tx_fc_enable) { /* Set Pause packet */ pch_gbe_mac_set_pause_packet(hw); } - if ((int_en & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT)) - == 0) { - return IRQ_HANDLED; - } } /* When request status is Receive interruption */ - if ((int_st & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT))) { + if ((int_st & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT)) || + (adapter->rx_stop_flag == true)) { if (likely(napi_schedule_prep(&adapter->napi))) { /* Enable only Rx Descriptor empty */ atomic_inc(&adapter->irq_sem); @@ -1185,29 +1260,23 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter, unsigned int i; unsigned int bufsz; - bufsz = adapter->rx_buffer_len + PCH_GBE_DMA_ALIGN; + bufsz = adapter->rx_buffer_len + NET_IP_ALIGN; i = rx_ring->next_to_use; while ((cleaned_count--)) { buffer_info = &rx_ring->buffer_info[i]; - skb = buffer_info->skb; - if (skb) { - skb_trim(skb, 0); - } else { - skb = netdev_alloc_skb(netdev, bufsz); - if (unlikely(!skb)) { - /* Better luck next round */ - adapter->stats.rx_alloc_buff_failed++; - break; - } - /* 64byte align */ - skb_reserve(skb, PCH_GBE_DMA_ALIGN); - - buffer_info->skb = skb; - buffer_info->length = adapter->rx_buffer_len; + skb = netdev_alloc_skb(netdev, bufsz); + if (unlikely(!skb)) { + /* Better luck next round */ + adapter->stats.rx_alloc_buff_failed++; + break; } + /* align */ + skb_reserve(skb, NET_IP_ALIGN); + buffer_info->skb = skb; + buffer_info->dma = dma_map_single(&pdev->dev, - skb->data, + buffer_info->rx_buffer, buffer_info->length, DMA_FROM_DEVICE); if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { @@ -1240,6 +1309,36 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter, return; } +static int +pch_gbe_alloc_rx_buffers_pool(struct pch_gbe_adapter *adapter, + struct pch_gbe_rx_ring *rx_ring, int cleaned_count) +{ + struct pci_dev *pdev = adapter->pdev; + struct pch_gbe_buffer *buffer_info; + unsigned int i; + unsigned int bufsz; + unsigned int size; + + bufsz = adapter->rx_buffer_len; + + size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY; + rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size, + &rx_ring->rx_buff_pool_logic, + GFP_KERNEL); + if (!rx_ring->rx_buff_pool) { + pr_err("Unable to allocate memory for the receive poll buffer\n"); + return -ENOMEM; + } + memset(rx_ring->rx_buff_pool, 0, size); + rx_ring->rx_buff_pool_size = size; + for (i = 0; i < rx_ring->count; i++) { + buffer_info = &rx_ring->buffer_info[i]; + buffer_info->rx_buffer = rx_ring->rx_buff_pool + bufsz * i; + buffer_info->length = bufsz; + } + return 0; +} + /** * pch_gbe_alloc_tx_buffers - Allocate transmit buffers * @adapter: Board private structure @@ -1285,7 +1384,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, struct sk_buff *skb; unsigned int i; unsigned int cleaned_count = 0; - bool cleaned = false; + bool cleaned = true; pr_debug("next_to_clean : %d\n", tx_ring->next_to_clean); @@ -1296,7 +1395,6 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, while ((tx_desc->gbec_status & DSC_INIT16) == 0x0000) { pr_debug("gbec_status:0x%04x\n", tx_desc->gbec_status); - cleaned = true; buffer_info = &tx_ring->buffer_info[i]; skb = buffer_info->skb; @@ -1339,8 +1437,10 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, tx_desc = PCH_GBE_TX_DESC(*tx_ring, i); /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (cleaned_count++ == PCH_GBE_TX_WEIGHT) + if (cleaned_count++ == PCH_GBE_TX_WEIGHT) { + cleaned = false; break; + } } pr_debug("called pch_gbe_unmap_and_free_tx_resource() %d count\n", cleaned_count); @@ -1380,7 +1480,7 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, unsigned int i; unsigned int cleaned_count = 0; bool cleaned = false; - struct sk_buff *skb, *new_skb; + struct sk_buff *skb; u8 dma_status; u16 gbec_status; u32 tcp_ip_status; @@ -1401,13 +1501,12 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, rx_desc->gbec_status = DSC_INIT16; buffer_info = &rx_ring->buffer_info[i]; skb = buffer_info->skb; + buffer_info->skb = NULL; /* unmap dma */ dma_unmap_single(&pdev->dev, buffer_info->dma, buffer_info->length, DMA_FROM_DEVICE); buffer_info->mapped = false; - /* Prefetch the packet */ - prefetch(skb->data); pr_debug("RxDecNo = 0x%04x Status[DMA:0x%02x GBE:0x%04x " "TCP:0x%08x] BufInf = 0x%p\n", @@ -1427,70 +1526,16 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, pr_err("Receive CRC Error\n"); } else { /* get receive length */ - /* length convert[-3] */ - length = (rx_desc->rx_words_eob) - 3; - - /* Decide the data conversion method */ - if (!(netdev->features & NETIF_F_RXCSUM)) { - /* [Header:14][payload] */ - if (NET_IP_ALIGN) { - /* Because alignment differs, - * the new_skb is newly allocated, - * and data is copied to new_skb.*/ - new_skb = netdev_alloc_skb(netdev, - length + NET_IP_ALIGN); - if (!new_skb) { - /* dorrop error */ - pr_err("New skb allocation " - "Error\n"); - goto dorrop; - } - skb_reserve(new_skb, NET_IP_ALIGN); - memcpy(new_skb->data, skb->data, - length); - skb = new_skb; - } else { - /* DMA buffer is used as SKB as it is.*/ - buffer_info->skb = NULL; - } - } else { - /* [Header:14][padding:2][payload] */ - /* The length includes padding length */ - length = length - PCH_GBE_DMA_PADDING; - if ((length < copybreak) || - (NET_IP_ALIGN != PCH_GBE_DMA_PADDING)) { - /* Because alignment differs, - * the new_skb is newly allocated, - * and data is copied to new_skb. - * Padding data is deleted - * at the time of a copy.*/ - new_skb = netdev_alloc_skb(netdev, - length + NET_IP_ALIGN); - if (!new_skb) { - /* dorrop error */ - pr_err("New skb allocation " - "Error\n"); - goto dorrop; - } - skb_reserve(new_skb, NET_IP_ALIGN); - memcpy(new_skb->data, skb->data, - ETH_HLEN); - memcpy(&new_skb->data[ETH_HLEN], - &skb->data[ETH_HLEN + - PCH_GBE_DMA_PADDING], - length - ETH_HLEN); - skb = new_skb; - } else { - /* Padding data is deleted - * by moving header data.*/ - memmove(&skb->data[PCH_GBE_DMA_PADDING], - &skb->data[0], ETH_HLEN); - skb_reserve(skb, NET_IP_ALIGN); - buffer_info->skb = NULL; - } - } - /* The length includes FCS length */ - length = length - ETH_FCS_LEN; + /* length convert[-3], length includes FCS length */ + length = (rx_desc->rx_words_eob) - 3 - ETH_FCS_LEN; + if (rx_desc->rx_words_eob & 0x02) + length = length - 4; + /* + * buffer_info->rx_buffer: [Header:14][payload] + * skb->data: [Reserve:2][Header:14][payload] + */ + memcpy(skb->data, buffer_info->rx_buffer, length); + /* update status of driver */ adapter->stats.rx_bytes += length; adapter->stats.rx_packets++; @@ -1509,7 +1554,6 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, pr_debug("Receive skb->ip_summed: %d length: %d\n", skb->ip_summed, length); } -dorrop: /* return some buffers to hardware, one at a time is too slow */ if (unlikely(cleaned_count >= PCH_GBE_RX_BUFFER_WRITE)) { pch_gbe_alloc_rx_buffers(adapter, rx_ring, @@ -1714,9 +1758,15 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) pr_err("Error: can't bring device up\n"); return err; } + err = pch_gbe_alloc_rx_buffers_pool(adapter, rx_ring, rx_ring->count); + if (err) { + pr_err("Error: can't bring device up\n"); + return err; + } pch_gbe_alloc_tx_buffers(adapter, tx_ring); pch_gbe_alloc_rx_buffers(adapter, rx_ring, rx_ring->count); adapter->tx_queue_len = netdev->tx_queue_len; + pch_gbe_start_receive(&adapter->hw); mod_timer(&adapter->watchdog_timer, jiffies); @@ -1734,6 +1784,7 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) void pch_gbe_down(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring; /* signal that we're down so the interrupt handler does not * reschedule our watchdog timer */ @@ -1752,6 +1803,12 @@ void pch_gbe_down(struct pch_gbe_adapter *adapter) pch_gbe_reset(adapter); pch_gbe_clean_tx_ring(adapter, adapter->tx_ring); pch_gbe_clean_rx_ring(adapter, adapter->rx_ring); + + pci_free_consistent(adapter->pdev, rx_ring->rx_buff_pool_size, + rx_ring->rx_buff_pool, rx_ring->rx_buff_pool_logic); + rx_ring->rx_buff_pool_logic = 0; + rx_ring->rx_buff_pool_size = 0; + rx_ring->rx_buff_pool = NULL; } /** @@ -2004,6 +2061,8 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); int max_frame; + unsigned long old_rx_buffer_len = adapter->rx_buffer_len; + int err; max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || @@ -2018,14 +2077,24 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) else if (max_frame <= PCH_GBE_FRAME_SIZE_8192) adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_8192; else - adapter->rx_buffer_len = PCH_GBE_MAX_JUMBO_FRAME_SIZE; - netdev->mtu = new_mtu; - adapter->hw.mac.max_frame_size = max_frame; + adapter->rx_buffer_len = PCH_GBE_MAX_RX_BUFFER_SIZE; - if (netif_running(netdev)) - pch_gbe_reinit_locked(adapter); - else + if (netif_running(netdev)) { + pch_gbe_down(adapter); + err = pch_gbe_up(adapter); + if (err) { + adapter->rx_buffer_len = old_rx_buffer_len; + pch_gbe_up(adapter); + return -ENOMEM; + } else { + netdev->mtu = new_mtu; + adapter->hw.mac.max_frame_size = max_frame; + } + } else { pch_gbe_reset(adapter); + netdev->mtu = new_mtu; + adapter->hw.mac.max_frame_size = max_frame; + } pr_debug("max_frame : %d rx_buffer_len : %d mtu : %d max_frame_size : %d\n", max_frame, (u32) adapter->rx_buffer_len, netdev->mtu, @@ -2099,33 +2168,39 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) { struct pch_gbe_adapter *adapter = container_of(napi, struct pch_gbe_adapter, napi); - struct net_device *netdev = adapter->netdev; int work_done = 0; bool poll_end_flag = false; bool cleaned = false; + u32 int_en; pr_debug("budget : %d\n", budget); - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(netdev)) { - poll_end_flag = true; - } else { - cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); - pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget); + pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget); + cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); - if (cleaned) - work_done = budget; - /* If no Tx and not enough Rx work done, - * exit the polling mode - */ - if ((work_done < budget) || !netif_running(netdev)) - poll_end_flag = true; - } + if (!cleaned) + work_done = budget; + /* If no Tx and not enough Rx work done, + * exit the polling mode + */ + if (work_done < budget) + poll_end_flag = true; if (poll_end_flag) { napi_complete(napi); + if (adapter->rx_stop_flag) { + adapter->rx_stop_flag = false; + pch_gbe_start_receive(&adapter->hw); + } pch_gbe_irq_enable(adapter); - } + } else + if (adapter->rx_stop_flag) { + adapter->rx_stop_flag = false; + pch_gbe_start_receive(&adapter->hw); + int_en = ioread32(&adapter->hw.reg->INT_EN); + iowrite32((int_en | PCH_GBE_INT_RX_FIFO_ERR), + &adapter->hw.reg->INT_EN); + } pr_debug("poll_end_flag : %d work_done : %d budget : %d\n", poll_end_flag, work_done, budget); @@ -2452,6 +2527,13 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = { .class = (PCI_CLASS_NETWORK_ETHERNET << 8), .class_mask = (0xFFFF00) }, + {.vendor = PCI_VENDOR_ID_ROHM, + .device = PCI_DEVICE_ID_ROHM_ML7831_GBE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = (PCI_CLASS_NETWORK_ETHERNET << 8), + .class_mask = (0xFFFF00) + }, /* required last entry */ {0} }; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index cb6e0b486b1..edd7304773e 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -589,7 +589,7 @@ static void decode_rxts(struct dp83640_private *dp83640, prune_rx_ts(dp83640); if (list_empty(&dp83640->rxpool)) { - pr_warning("dp83640: rx timestamp pool is empty\n"); + pr_debug("dp83640: rx timestamp pool is empty\n"); goto out; } rxts = list_first_entry(&dp83640->rxpool, struct rxts, list); @@ -612,7 +612,7 @@ static void decode_txts(struct dp83640_private *dp83640, skb = skb_dequeue(&dp83640->tx_queue); if (!skb) { - pr_warning("dp83640: have timestamp but tx_queue empty\n"); + pr_debug("dp83640: have timestamp but tx_queue empty\n"); return; } ns = phy2txts(phy_txts); diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 10e5d985afa..edfa15d2e79 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1465,7 +1465,12 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) continue; } - mtu = pch->chan->mtu - hdrlen; + /* + * hdrlen includes the 2-byte PPP protocol field, but the + * MTU counts only the payload excluding the protocol field. + * (RFC1661 Section 2) + */ + mtu = pch->chan->mtu - (hdrlen - 2); if (mtu < 4) mtu = 4; if (flen > mtu) diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c index eae542a7e98..89f829f5f72 100644 --- a/drivers/net/pptp.c +++ b/drivers/net/pptp.c @@ -285,8 +285,10 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) ip_send_check(iph); ip_local_out(skb); + return 1; tx_error: + kfree_skb(skb); return 1; } @@ -305,11 +307,18 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) } header = (struct pptp_gre_header *)(skb->data); + headersize = sizeof(*header); /* test if acknowledgement present */ if (PPTP_GRE_IS_A(header->ver)) { - __u32 ack = (PPTP_GRE_IS_S(header->flags)) ? - header->ack : header->seq; /* ack in different place if S = 0 */ + __u32 ack; + + if (!pskb_may_pull(skb, headersize)) + goto drop; + header = (struct pptp_gre_header *)(skb->data); + + /* ack in different place if S = 0 */ + ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq; ack = ntohl(ack); @@ -318,21 +327,18 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) /* also handle sequence number wrap-around */ if (WRAPPED(ack, opt->ack_recv)) opt->ack_recv = ack; + } else { + headersize -= sizeof(header->ack); } - /* test if payload present */ if (!PPTP_GRE_IS_S(header->flags)) goto drop; - headersize = sizeof(*header); payload_len = ntohs(header->payload_len); seq = ntohl(header->seq); - /* no ack present? */ - if (!PPTP_GRE_IS_A(header->ver)) - headersize -= sizeof(header->ack); /* check for incomplete packet (length smaller than expected) */ - if (skb->len - headersize < payload_len) + if (!pskb_may_pull(skb, headersize + payload_len)) goto drop; payload = skb->data + headersize; diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c index 1a3033d8e7e..d17d0624c5e 100644 --- a/drivers/net/pxa168_eth.c +++ b/drivers/net/pxa168_eth.c @@ -40,6 +40,7 @@ #include <linux/clk.h> #include <linux/phy.h> #include <linux/io.h> +#include <linux/interrupt.h> #include <linux/types.h> #include <asm/pgtable.h> #include <asm/system.h> diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 02339b3352e..6d657cabb95 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -407,6 +407,7 @@ enum rtl_register_content { RxOK = 0x0001, /* RxStatusDesc */ + RxBOVF = (1 << 24), RxFOVF = (1 << 23), RxRWT = (1 << 22), RxRES = (1 << 21), @@ -682,6 +683,7 @@ struct rtl8169_private { struct mii_if_info mii; struct rtl8169_counters counters; u32 saved_wolopts; + u32 opts1_mask; struct rtl_fw { const struct firmware *fw; @@ -710,6 +712,7 @@ MODULE_FIRMWARE(FIRMWARE_8168D_1); MODULE_FIRMWARE(FIRMWARE_8168D_2); MODULE_FIRMWARE(FIRMWARE_8168E_1); MODULE_FIRMWARE(FIRMWARE_8168E_2); +MODULE_FIRMWARE(FIRMWARE_8168E_3); MODULE_FIRMWARE(FIRMWARE_8105E_1); static int rtl8169_open(struct net_device *dev); @@ -2856,7 +2859,7 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0004); rtl_writephy(tp, 0x1f, 0x0007); rtl_writephy(tp, 0x1e, 0x0020); - rtl_w1w0_phy(tp, 0x06, 0x0000, 0x0100); + rtl_w1w0_phy(tp, 0x15, 0x0000, 0x0100); rtl_writephy(tp, 0x1f, 0x0002); rtl_writephy(tp, 0x1f, 0x0000); rtl_writephy(tp, 0x0d, 0x0007); @@ -3077,6 +3080,14 @@ static void rtl8169_phy_reset(struct net_device *dev, netif_err(tp, link, dev, "PHY reset failed\n"); } +static bool rtl_tbi_enabled(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + + return (tp->mac_version == RTL_GIGA_MAC_VER_01) && + (RTL_R8(PHYstatus) & TBI_Enable); +} + static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) { void __iomem *ioaddr = tp->mmio_addr; @@ -3109,7 +3120,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full : 0)); - if (RTL_R8(PHYstatus) & TBI_Enable) + if (rtl_tbi_enabled(tp)) netif_info(tp, link, dev, "TBI auto-negotiating\n"); } @@ -3305,6 +3316,37 @@ static void __devinit rtl_init_mdio_ops(struct rtl8169_private *tp) } } +static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_29: + case RTL_GIGA_MAC_VER_30: + case RTL_GIGA_MAC_VER_32: + case RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_34: + RTL_W32(RxConfig, RTL_R32(RxConfig) | + AcceptBroadcast | AcceptMulticast | AcceptMyPhys); + break; + default: + break; + } +} + +static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) +{ + if (!(__rtl8169_get_wol(tp) & WAKE_ANY)) + return false; + + rtl_writephy(tp, 0x1f, 0x0000); + rtl_writephy(tp, MII_BMCR, 0x0000); + + rtl_wol_suspend_quirk(tp); + + return true; +} + static void r810x_phy_power_down(struct rtl8169_private *tp) { rtl_writephy(tp, 0x1f, 0x0000); @@ -3319,11 +3361,8 @@ static void r810x_phy_power_up(struct rtl8169_private *tp) static void r810x_pll_power_down(struct rtl8169_private *tp) { - if (__rtl8169_get_wol(tp) & WAKE_ANY) { - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, MII_BMCR, 0x0000); + if (rtl_wol_pll_power_down(tp)) return; - } r810x_phy_power_down(tp); } @@ -3412,16 +3451,8 @@ static void r8168_pll_power_down(struct rtl8169_private *tp) tp->mac_version == RTL_GIGA_MAC_VER_33) rtl_ephy_write(ioaddr, 0x19, 0xff64); - if (__rtl8169_get_wol(tp) & WAKE_ANY) { - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, MII_BMCR, 0x0000); - - if (tp->mac_version == RTL_GIGA_MAC_VER_32 || - tp->mac_version == RTL_GIGA_MAC_VER_33) - RTL_W32(RxConfig, RTL_R32(RxConfig) | AcceptBroadcast | - AcceptMulticast | AcceptMyPhys); + if (rtl_wol_pll_power_down(tp)) return; - } r8168_phy_power_down(tp); @@ -3727,8 +3758,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->features |= rtl_try_msi(pdev, ioaddr, cfg); RTL_W8(Cfg9346, Cfg9346_Lock); - if ((tp->mac_version <= RTL_GIGA_MAC_VER_06) && - (RTL_R8(PHYstatus) & TBI_Enable)) { + if (rtl_tbi_enabled(tp)) { tp->set_speed = rtl8169_set_speed_tbi; tp->get_settings = rtl8169_gset_tbi; tp->phy_reset_enable = rtl8169_tbi_reset_enable; @@ -3777,6 +3807,9 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->intr_event = cfg->intr_event; tp->napi_event = cfg->napi_event; + tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? + ~(RxBOVF | RxFOVF) : ~0; + init_timer(&tp->timer); tp->timer.data = (unsigned long) dev; tp->timer.function = rtl8169_phy_timer; @@ -3988,6 +4021,7 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp) while (RTL_R8(TxPoll) & NPQ) udelay(20); } else if (tp->mac_version == RTL_GIGA_MAC_VER_34) { + RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq); while (!(RTL_R32(TxConfig) & TXCFG_EMPTY)) udelay(100); } else { @@ -5314,7 +5348,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev, u32 status; rmb(); - status = le32_to_cpu(desc->opts1); + status = le32_to_cpu(desc->opts1) & tp->opts1_mask; if (status & DescOwn) break; @@ -5766,11 +5800,30 @@ static const struct dev_pm_ops rtl8169_pm_ops = { #endif /* !CONFIG_PM */ +static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + + /* WoL fails with 8168b when the receiver is disabled. */ + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_11: + case RTL_GIGA_MAC_VER_12: + case RTL_GIGA_MAC_VER_17: + pci_clear_master(tp->pci_dev); + + RTL_W8(ChipCmd, CmdRxEnb); + /* PCI commit */ + RTL_R8(ChipCmd); + break; + default: + break; + } +} + static void rtl_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct rtl8169_private *tp = netdev_priv(dev); - void __iomem *ioaddr = tp->mmio_addr; rtl8169_net_suspend(dev); @@ -5784,16 +5837,9 @@ static void rtl_shutdown(struct pci_dev *pdev) spin_unlock_irq(&tp->lock); if (system_state == SYSTEM_POWER_OFF) { - /* WoL fails with 8168b when the receiver is disabled. */ - if ((tp->mac_version == RTL_GIGA_MAC_VER_11 || - tp->mac_version == RTL_GIGA_MAC_VER_12 || - tp->mac_version == RTL_GIGA_MAC_VER_17) && - (tp->features & RTL_FEATURE_WOL)) { - pci_clear_master(pdev); - - RTL_W8(ChipCmd, CmdRxEnb); - /* PCI commit */ - RTL_R8(ChipCmd); + if (__rtl8169_get_wol(tp) & WAKE_ANY) { + rtl_wol_suspend_quirk(tp); + rtl_wol_shutdown_quirk(tp); } pci_wake_from_d3(pdev, true); diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index faca764aa21..b59abc706d9 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -1050,7 +1050,6 @@ static int efx_init_io(struct efx_nic *efx) { struct pci_dev *pci_dev = efx->pci_dev; dma_addr_t dma_mask = efx->type->max_dma_mask; - bool use_wc; int rc; netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); @@ -1101,21 +1100,8 @@ static int efx_init_io(struct efx_nic *efx) rc = -EIO; goto fail3; } - - /* bug22643: If SR-IOV is enabled then tx push over a write combined - * mapping is unsafe. We need to disable write combining in this case. - * MSI is unsupported when SR-IOV is enabled, and the firmware will - * have removed the MSI capability. So write combining is safe if - * there is an MSI capability. - */ - use_wc = (!EFX_WORKAROUND_22643(efx) || - pci_find_capability(pci_dev, PCI_CAP_ID_MSI)); - if (use_wc) - efx->membase = ioremap_wc(efx->membase_phys, - efx->type->mem_map_size); - else - efx->membase = ioremap_nocache(efx->membase_phys, - efx->type->mem_map_size); + efx->membase = ioremap_nocache(efx->membase_phys, + efx->type->mem_map_size); if (!efx->membase) { netif_err(efx, probe, efx->net_dev, "could not map memory BAR at %llx+%x\n", diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index cc978803d48..751d1ec112c 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -103,7 +103,6 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, _efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[3], reg + 12); #endif - wmb(); mmiowb(); spin_unlock_irqrestore(&efx->biu_lock, flags); } @@ -126,7 +125,6 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, __raw_writel((__force u32)value->u32[0], membase + addr); __raw_writel((__force u32)value->u32[1], membase + addr + 4); #endif - wmb(); mmiowb(); spin_unlock_irqrestore(&efx->biu_lock, flags); } @@ -141,7 +139,6 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value, /* No lock required */ _efx_writed(efx, value->u32[0], reg); - wmb(); } /* Read a 128-bit CSR, locking as appropriate. */ @@ -152,7 +149,6 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value, spin_lock_irqsave(&efx->biu_lock, flags); value->u32[0] = _efx_readd(efx, reg + 0); - rmb(); value->u32[1] = _efx_readd(efx, reg + 4); value->u32[2] = _efx_readd(efx, reg + 8); value->u32[3] = _efx_readd(efx, reg + 12); @@ -175,7 +171,6 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase, value->u64[0] = (__force __le64)__raw_readq(membase + addr); #else value->u32[0] = (__force __le32)__raw_readl(membase + addr); - rmb(); value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4); #endif spin_unlock_irqrestore(&efx->biu_lock, flags); @@ -249,7 +244,6 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, _efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[3], reg + 12); #endif - wmb(); } #define efx_writeo_page(efx, value, reg, page) \ _efx_writeo_page(efx, value, \ diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c index 3dd45ed61f0..81a42539746 100644 --- a/drivers/net/sfc/mcdi.c +++ b/drivers/net/sfc/mcdi.c @@ -50,20 +50,6 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) return &nic_data->mcdi; } -static inline void -efx_mcdi_readd(struct efx_nic *efx, efx_dword_t *value, unsigned reg) -{ - struct siena_nic_data *nic_data = efx->nic_data; - value->u32[0] = (__force __le32)__raw_readl(nic_data->mcdi_smem + reg); -} - -static inline void -efx_mcdi_writed(struct efx_nic *efx, const efx_dword_t *value, unsigned reg) -{ - struct siena_nic_data *nic_data = efx->nic_data; - __raw_writel((__force u32)value->u32[0], nic_data->mcdi_smem + reg); -} - void efx_mcdi_init(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi; @@ -84,8 +70,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned pdu = MCDI_PDU(efx); - unsigned doorbell = MCDI_DOORBELL(efx); + unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); + unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx); unsigned int i; efx_dword_t hdr; u32 xflags, seqno; @@ -106,28 +92,29 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, MCDI_HEADER_SEQ, seqno, MCDI_HEADER_XFLAGS, xflags); - efx_mcdi_writed(efx, &hdr, pdu); + efx_writed(efx, &hdr, pdu); for (i = 0; i < inlen; i += 4) - efx_mcdi_writed(efx, (const efx_dword_t *)(inbuf + i), - pdu + 4 + i); + _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); + + /* Ensure the payload is written out before the header */ + wmb(); /* ring the doorbell with a distinctive value */ - EFX_POPULATE_DWORD_1(hdr, EFX_DWORD_0, 0x45789abc); - efx_mcdi_writed(efx, &hdr, doorbell); + _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); } static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned int pdu = MCDI_PDU(efx); + unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); int i; BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT); BUG_ON(outlen & 3 || outlen >= 0x100); for (i = 0; i < outlen; i += 4) - efx_mcdi_readd(efx, (efx_dword_t *)(outbuf + i), pdu + 4 + i); + *((__le32 *)(outbuf + i)) = _efx_readd(efx, pdu + 4 + i); } static int efx_mcdi_poll(struct efx_nic *efx) @@ -135,7 +122,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) struct efx_mcdi_iface *mcdi = efx_mcdi(efx); unsigned int time, finish; unsigned int respseq, respcmd, error; - unsigned int pdu = MCDI_PDU(efx); + unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); unsigned int rc, spins; efx_dword_t reg; @@ -161,7 +148,8 @@ static int efx_mcdi_poll(struct efx_nic *efx) time = get_seconds(); - efx_mcdi_readd(efx, ®, pdu); + rmb(); + efx_readd(efx, ®, pdu); /* All 1's indicates that shared memory is in reset (and is * not a valid header). Wait for it to come out reset before @@ -188,7 +176,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) respseq, mcdi->seqno); rc = EIO; } else if (error) { - efx_mcdi_readd(efx, ®, pdu + 4); + efx_readd(efx, ®, pdu + 4); switch (EFX_DWORD_FIELD(reg, EFX_DWORD_0)) { #define TRANSLATE_ERROR(name) \ case MC_CMD_ERR_ ## name: \ @@ -222,21 +210,21 @@ out: /* Test and clear MC-rebooted flag for this port/function */ int efx_mcdi_poll_reboot(struct efx_nic *efx) { - unsigned int addr = MCDI_REBOOT_FLAG(efx); + unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_REBOOT_FLAG(efx); efx_dword_t reg; uint32_t value; if (efx_nic_rev(efx) < EFX_REV_SIENA_A0) return false; - efx_mcdi_readd(efx, ®, addr); + efx_readd(efx, ®, addr); value = EFX_DWORD_FIELD(reg, EFX_DWORD_0); if (value == 0) return 0; EFX_ZERO_DWORD(reg); - efx_mcdi_writed(efx, ®, addr); + efx_writed(efx, ®, addr); if (value == MC_STATUS_DWORD_ASSERT) return -EINTR; diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c index bafa23a6874..3edfbaf5f02 100644 --- a/drivers/net/sfc/nic.c +++ b/drivers/net/sfc/nic.c @@ -1936,13 +1936,6 @@ void efx_nic_get_regs(struct efx_nic *efx, void *buf) size = min_t(size_t, table->step, 16); - if (table->offset >= efx->type->mem_map_size) { - /* No longer mapped; return dummy data */ - memcpy(buf, "\xde\xc0\xad\xde", 4); - buf += table->rows * size; - continue; - } - for (i = 0; i < table->rows; i++) { switch (table->step) { case 4: /* 32-bit register or SRAM */ diff --git a/drivers/net/sfc/nic.h b/drivers/net/sfc/nic.h index 4bd1f2839df..7443f99c977 100644 --- a/drivers/net/sfc/nic.h +++ b/drivers/net/sfc/nic.h @@ -143,12 +143,10 @@ static inline struct falcon_board *falcon_board(struct efx_nic *efx) /** * struct siena_nic_data - Siena NIC state * @mcdi: Management-Controller-to-Driver Interface - * @mcdi_smem: MCDI shared memory mapping. The mapping is always uncacheable. * @wol_filter_id: Wake-on-LAN packet filter id */ struct siena_nic_data { struct efx_mcdi_iface mcdi; - void __iomem *mcdi_smem; int wol_filter_id; }; diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index 5735e84c69d..2c3bd93fab5 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -250,26 +250,12 @@ static int siena_probe_nic(struct efx_nic *efx) efx_reado(efx, ®, FR_AZ_CS_DEBUG); efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; - /* Initialise MCDI */ - nic_data->mcdi_smem = ioremap_nocache(efx->membase_phys + - FR_CZ_MC_TREG_SMEM, - FR_CZ_MC_TREG_SMEM_STEP * - FR_CZ_MC_TREG_SMEM_ROWS); - if (!nic_data->mcdi_smem) { - netif_err(efx, probe, efx->net_dev, - "could not map MCDI at %llx+%x\n", - (unsigned long long)efx->membase_phys + - FR_CZ_MC_TREG_SMEM, - FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS); - rc = -ENOMEM; - goto fail1; - } efx_mcdi_init(efx); /* Recover from a failed assertion before probing */ rc = efx_mcdi_handle_assertion(efx); if (rc) - goto fail2; + goto fail1; /* Let the BMC know that the driver is now in charge of link and * filter settings. We must do this before we reset the NIC */ @@ -324,7 +310,6 @@ fail4: fail3: efx_mcdi_drv_attach(efx, false, NULL); fail2: - iounmap(nic_data->mcdi_smem); fail1: kfree(efx->nic_data); return rc; @@ -404,8 +389,6 @@ static int siena_init_nic(struct efx_nic *efx) static void siena_remove_nic(struct efx_nic *efx) { - struct siena_nic_data *nic_data = efx->nic_data; - efx_nic_free_buffer(efx, &efx->irq_status); siena_reset_hw(efx, RESET_TYPE_ALL); @@ -415,8 +398,7 @@ static void siena_remove_nic(struct efx_nic *efx) efx_mcdi_drv_attach(efx, false, NULL); /* Tear down the private nic state */ - iounmap(nic_data->mcdi_smem); - kfree(nic_data); + kfree(efx->nic_data); efx->nic_data = NULL; } @@ -656,7 +638,8 @@ const struct efx_nic_type siena_a0_nic_type = { .default_mac_ops = &efx_mcdi_mac_operations, .revision = EFX_REV_SIENA_A0, - .mem_map_size = FR_CZ_MC_TREG_SMEM, /* MC_TREG_SMEM mapped separately */ + .mem_map_size = (FR_CZ_MC_TREG_SMEM + + FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS), .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL, .buf_tbl_base = FR_BZ_BUF_FULL_TBL, diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h index 99ff11400ce..e4dd3a7f304 100644 --- a/drivers/net/sfc/workarounds.h +++ b/drivers/net/sfc/workarounds.h @@ -38,8 +38,6 @@ #define EFX_WORKAROUND_15783 EFX_WORKAROUND_ALWAYS /* Legacy interrupt storm when interrupt fifo fills */ #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA -/* Write combining and sriov=enabled are incompatible */ -#define EFX_WORKAROUND_22643 EFX_WORKAROUND_SIENA /* Spurious parity errors in TSORT buffers */ #define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index b9016a30cdc..c90ddb61cc5 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -26,6 +26,7 @@ * LAN9215, LAN9216, LAN9217, LAN9218 * LAN9210, LAN9211 * LAN9220, LAN9221 + * LAN89218 * */ @@ -1983,6 +1984,7 @@ static int __devinit smsc911x_init(struct net_device *dev) case 0x01170000: case 0x01160000: case 0x01150000: + case 0x218A0000: /* LAN911[5678] family */ pdata->generation = pdata->idrev & 0x0000FFFF; break; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index dc3fbf61910..c11a2b8327f 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -6234,12 +6234,10 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) } } -#ifdef BCM_KERNEL_SUPPORTS_8021Q if (vlan_tx_tag_present(skb)) { base_flags |= TXD_FLAG_VLAN; vlan = vlan_tx_tag_get(skb); } -#endif if (tg3_flag(tp, USE_JUMBO_BDFLAG) && !mss && skb->len > VLAN_ETH_FRAME_LEN) @@ -15579,7 +15577,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) cancel_work_sync(&tp->reset_task); - if (!tg3_flag(tp, USE_PHYLIB)) { + if (tg3_flag(tp, USE_PHYLIB)) { tg3_phy_fini(tp); tg3_mdio_fini(tp); } diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 15772b1b6a9..13c1f044b40 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -59,6 +59,7 @@ #define USB_PRODUCT_IPHONE_3G 0x1292 #define USB_PRODUCT_IPHONE_3GS 0x1294 #define USB_PRODUCT_IPHONE_4 0x1297 +#define USB_PRODUCT_IPHONE_4_VZW 0x129c #define IPHETH_USBINTF_CLASS 255 #define IPHETH_USBINTF_SUBCLASS 253 @@ -98,6 +99,10 @@ static struct usb_device_id ipheth_table[] = { USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4, IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, IPHETH_USBINTF_PROTO) }, + { USB_DEVICE_AND_INTERFACE_INFO( + USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW, + IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS, + IPHETH_USBINTF_PROTO) }, { } }; MODULE_DEVICE_TABLE(usb, ipheth_table); diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c index 2d4c0910295..2d394af8217 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c @@ -41,7 +41,8 @@ static bool ar9002_hw_is_cal_supported(struct ath_hw *ah, case ADC_DC_CAL: /* Run ADC Gain Cal for non-CCK & non 2GHz-HT20 only */ if (!IS_CHAN_B(chan) && - !(IS_CHAN_2GHZ(chan) && IS_CHAN_HT20(chan))) + !((IS_CHAN_2GHZ(chan) || IS_CHAN_A_FAST_CLOCK(ah, chan)) && + IS_CHAN_HT20(chan))) supported = true; break; } diff --git a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h index 2339728a730..3e69c631ebb 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h @@ -1514,7 +1514,7 @@ static const u32 ar9300_2p2_mac_core[][2] = { {0x00008258, 0x00000000}, {0x0000825c, 0x40000000}, {0x00008260, 0x00080922}, - {0x00008264, 0x9bc00010}, + {0x00008264, 0x9d400010}, {0x00008268, 0xffffffff}, {0x0000826c, 0x0000ffff}, {0x00008270, 0x00000000}, diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 1baca8e4715..fcafec0605f 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -671,7 +671,7 @@ static int ar9003_hw_process_ini(struct ath_hw *ah, REG_WRITE_ARRAY(&ah->iniModesAdditional, modesIndex, regWrites); - if (AR_SREV_9300(ah)) + if (AR_SREV_9330(ah)) REG_WRITE_ARRAY(&ah->iniModesAdditional, 1, regWrites); if (AR_SREV_9340(ah) && !ah->is_clk_25mhz) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 6530694a59a..722967b86cf 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2303,6 +2303,12 @@ static void ath9k_flush(struct ieee80211_hw *hw, bool drop) mutex_lock(&sc->mutex); cancel_delayed_work_sync(&sc->tx_complete_work); + if (ah->ah_flags & AH_UNPLUGGED) { + ath_dbg(common, ATH_DBG_ANY, "Device has been unplugged!\n"); + mutex_unlock(&sc->mutex); + return; + } + if (sc->sc_flags & SC_OP_INVALID) { ath_dbg(common, ATH_DBG_ANY, "Device not present\n"); mutex_unlock(&sc->mutex); diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 9a4850154fb..4c21f8cbdeb 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -205,14 +205,22 @@ static void ath_rx_remove_buffer(struct ath_softc *sc, static void ath_rx_edma_cleanup(struct ath_softc *sc) { + struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(ah); struct ath_buf *bf; ath_rx_remove_buffer(sc, ATH9K_RX_QUEUE_LP); ath_rx_remove_buffer(sc, ATH9K_RX_QUEUE_HP); list_for_each_entry(bf, &sc->rx.rxbuf, list) { - if (bf->bf_mpdu) + if (bf->bf_mpdu) { + dma_unmap_single(sc->dev, bf->bf_buf_addr, + common->rx_bufsize, + DMA_BIDIRECTIONAL); dev_kfree_skb_any(bf->bf_mpdu); + bf->bf_buf_addr = 0; + bf->bf_mpdu = NULL; + } } INIT_LIST_HEAD(&sc->rx.rxbuf); diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 26f1ab840cc..e293a7921bf 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -1632,7 +1632,8 @@ static void handle_irq_beacon(struct b43_wldev *dev) u32 cmd, beacon0_valid, beacon1_valid; if (!b43_is_mode(wl, NL80211_IFTYPE_AP) && - !b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) + !b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT) && + !b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) return; /* This is the bottom half of the asynchronous beacon update. */ diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 3774dd03474..ef9ad79d1bf 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -1903,15 +1903,17 @@ static void ipw2100_down(struct ipw2100_priv *priv) static int ipw2100_net_init(struct net_device *dev) { struct ipw2100_priv *priv = libipw_priv(dev); + + return ipw2100_up(priv, 1); +} + +static int ipw2100_wdev_init(struct net_device *dev) +{ + struct ipw2100_priv *priv = libipw_priv(dev); const struct libipw_geo *geo = libipw_get_geo(priv->ieee); struct wireless_dev *wdev = &priv->ieee->wdev; - int ret; int i; - ret = ipw2100_up(priv, 1); - if (ret) - return ret; - memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN); /* fill-out priv->ieee->bg_band */ @@ -6350,9 +6352,13 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, "Error calling register_netdev.\n"); goto fail; } + registered = 1; + + err = ipw2100_wdev_init(dev); + if (err) + goto fail; mutex_lock(&priv->action_mutex); - registered = 1; IPW_DEBUG_INFO("%s: Bound to %s\n", dev->name, pci_name(pci_dev)); @@ -6389,7 +6395,8 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, fail_unlock: mutex_unlock(&priv->action_mutex); - + wiphy_unregister(priv->ieee->wdev.wiphy); + kfree(priv->ieee->bg_band.channels); fail: if (dev) { if (registered) diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 87813c33bdc..4ffebede5e0 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -11425,16 +11425,23 @@ static void ipw_bg_down(struct work_struct *work) /* Called by register_netdev() */ static int ipw_net_init(struct net_device *dev) { + int rc = 0; + struct ipw_priv *priv = libipw_priv(dev); + + mutex_lock(&priv->mutex); + if (ipw_up(priv)) + rc = -EIO; + mutex_unlock(&priv->mutex); + + return rc; +} + +static int ipw_wdev_init(struct net_device *dev) +{ int i, rc = 0; struct ipw_priv *priv = libipw_priv(dev); const struct libipw_geo *geo = libipw_get_geo(priv->ieee); struct wireless_dev *wdev = &priv->ieee->wdev; - mutex_lock(&priv->mutex); - - if (ipw_up(priv)) { - rc = -EIO; - goto out; - } memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN); @@ -11519,13 +11526,9 @@ static int ipw_net_init(struct net_device *dev) set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev); /* With that information in place, we can now register the wiphy... */ - if (wiphy_register(wdev->wiphy)) { + if (wiphy_register(wdev->wiphy)) rc = -EIO; - goto out; - } - out: - mutex_unlock(&priv->mutex); return rc; } @@ -11832,14 +11835,22 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev, goto out_remove_sysfs; } + err = ipw_wdev_init(net_dev); + if (err) { + IPW_ERROR("failed to register wireless device\n"); + goto out_unregister_netdev; + } + #ifdef CONFIG_IPW2200_PROMISCUOUS if (rtap_iface) { err = ipw_prom_alloc(priv); if (err) { IPW_ERROR("Failed to register promiscuous network " "device (error %d).\n", err); - unregister_netdev(priv->net_dev); - goto out_remove_sysfs; + wiphy_unregister(priv->ieee->wdev.wiphy); + kfree(priv->ieee->a_band.channels); + kfree(priv->ieee->bg_band.channels); + goto out_unregister_netdev; } } #endif @@ -11851,6 +11862,8 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev, return 0; + out_unregister_netdev: + unregister_netdev(priv->net_dev); out_remove_sysfs: sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group); out_release_irq: diff --git a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c index 977bd2477c6..164bcae821f 100644 --- a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c @@ -822,12 +822,15 @@ static void iwl3945_rs_get_rate(void *priv_r, struct ieee80211_sta *sta, out: - rs_sta->last_txrate_idx = index; - if (sband->band == IEEE80211_BAND_5GHZ) - info->control.rates[0].idx = rs_sta->last_txrate_idx - - IWL_FIRST_OFDM_RATE; - else + if (sband->band == IEEE80211_BAND_5GHZ) { + if (WARN_ON_ONCE(index < IWL_FIRST_OFDM_RATE)) + index = IWL_FIRST_OFDM_RATE; + rs_sta->last_txrate_idx = index; + info->control.rates[0].idx = index - IWL_FIRST_OFDM_RATE; + } else { + rs_sta->last_txrate_idx = index; info->control.rates[0].idx = rs_sta->last_txrate_idx; + } IWL_DEBUG_RATE(priv, "leave: %d\n", index); } diff --git a/drivers/net/wireless/iwlegacy/iwl-core.c b/drivers/net/wireless/iwlegacy/iwl-core.c index 35cd2537e7f..e5971fe9d16 100644 --- a/drivers/net/wireless/iwlegacy/iwl-core.c +++ b/drivers/net/wireless/iwlegacy/iwl-core.c @@ -937,7 +937,7 @@ void iwl_legacy_irq_handle_error(struct iwl_priv *priv) &priv->contexts[IWL_RXON_CTX_BSS]); #endif - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); /* Keep the restart process from trying to send host * commands by clearing the INIT status bit */ @@ -1746,7 +1746,7 @@ int iwl_legacy_force_reset(struct iwl_priv *priv, bool external) /* Set the FW error flag -- cleared on iwl_down */ set_bit(STATUS_FW_ERROR, &priv->status); - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); /* * Keep the restart process from trying to send host * commands by clearing the INIT status bit diff --git a/drivers/net/wireless/iwlegacy/iwl-hcmd.c b/drivers/net/wireless/iwlegacy/iwl-hcmd.c index 62b4b09122c..ce1fc9feb61 100644 --- a/drivers/net/wireless/iwlegacy/iwl-hcmd.c +++ b/drivers/net/wireless/iwlegacy/iwl-hcmd.c @@ -167,7 +167,7 @@ int iwl_legacy_send_cmd_sync(struct iwl_priv *priv, struct iwl_host_cmd *cmd) goto out; } - ret = wait_event_interruptible_timeout(priv->wait_command_queue, + ret = wait_event_timeout(priv->wait_command_queue, !test_bit(STATUS_HCMD_ACTIVE, &priv->status), HOST_COMPLETE_TIMEOUT); if (!ret) { diff --git a/drivers/net/wireless/iwlegacy/iwl-tx.c b/drivers/net/wireless/iwlegacy/iwl-tx.c index 4fff995c6f3..ef9e268bf8a 100644 --- a/drivers/net/wireless/iwlegacy/iwl-tx.c +++ b/drivers/net/wireless/iwlegacy/iwl-tx.c @@ -625,6 +625,8 @@ iwl_legacy_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb) cmd = txq->cmd[cmd_index]; meta = &txq->meta[cmd_index]; + txq->time_stamp = jiffies; + pci_unmap_single(priv->pci_dev, dma_unmap_addr(meta, mapping), dma_unmap_len(meta, len), @@ -645,7 +647,7 @@ iwl_legacy_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb) clear_bit(STATUS_HCMD_ACTIVE, &priv->status); IWL_DEBUG_INFO(priv, "Clearing HCMD_ACTIVE for command %s\n", iwl_legacy_get_cmd_string(cmd->hdr.cmd)); - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); } /* Mark as unmapped */ diff --git a/drivers/net/wireless/iwlegacy/iwl3945-base.c b/drivers/net/wireless/iwlegacy/iwl3945-base.c index 795826a014e..66ee15629a7 100644 --- a/drivers/net/wireless/iwlegacy/iwl3945-base.c +++ b/drivers/net/wireless/iwlegacy/iwl3945-base.c @@ -841,7 +841,7 @@ static void iwl3945_rx_card_state_notif(struct iwl_priv *priv, wiphy_rfkill_set_hw_state(priv->hw->wiphy, test_bit(STATUS_RF_KILL_HW, &priv->status)); else - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); } /** @@ -2269,7 +2269,7 @@ static void iwl3945_alive_start(struct iwl_priv *priv) iwl3945_reg_txpower_periodic(priv); IWL_DEBUG_INFO(priv, "ALIVE processing complete.\n"); - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); return; @@ -2300,7 +2300,7 @@ static void __iwl3945_down(struct iwl_priv *priv) iwl_legacy_clear_driver_stations(priv); /* Unblock any waiting calls */ - wake_up_interruptible_all(&priv->wait_command_queue); + wake_up_all(&priv->wait_command_queue); /* Wipe out the EXIT_PENDING status bit if we are not actually * exiting the module */ @@ -2853,7 +2853,7 @@ static int iwl3945_mac_start(struct ieee80211_hw *hw) /* Wait for START_ALIVE from ucode. Otherwise callbacks from * mac80211 will not be run successfully. */ - ret = wait_event_interruptible_timeout(priv->wait_command_queue, + ret = wait_event_timeout(priv->wait_command_queue, test_bit(STATUS_READY, &priv->status), UCODE_READY_TIMEOUT); if (!ret) { diff --git a/drivers/net/wireless/iwlegacy/iwl4965-base.c b/drivers/net/wireless/iwlegacy/iwl4965-base.c index 14334668034..aa0c2539761 100644 --- a/drivers/net/wireless/iwlegacy/iwl4965-base.c +++ b/drivers/net/wireless/iwlegacy/iwl4965-base.c @@ -576,7 +576,7 @@ static void iwl4965_rx_card_state_notif(struct iwl_priv *priv, wiphy_rfkill_set_hw_state(priv->hw->wiphy, test_bit(STATUS_RF_KILL_HW, &priv->status)); else - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); } /** @@ -926,7 +926,7 @@ static void iwl4965_irq_tasklet(struct iwl_priv *priv) handled |= CSR_INT_BIT_FH_TX; /* Wake up uCode load routine, now that load is complete */ priv->ucode_write_complete = 1; - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); } if (inta & ~handled) { @@ -1795,7 +1795,7 @@ static void iwl4965_alive_start(struct iwl_priv *priv) iwl4965_rf_kill_ct_config(priv); IWL_DEBUG_INFO(priv, "ALIVE processing complete.\n"); - wake_up_interruptible(&priv->wait_command_queue); + wake_up(&priv->wait_command_queue); iwl_legacy_power_update_mode(priv, true); IWL_DEBUG_INFO(priv, "Updated power mode\n"); @@ -1828,7 +1828,7 @@ static void __iwl4965_down(struct iwl_priv *priv) iwl_legacy_clear_driver_stations(priv); /* Unblock any waiting calls */ - wake_up_interruptible_all(&priv->wait_command_queue); + wake_up_all(&priv->wait_command_queue); /* Wipe out the EXIT_PENDING status bit if we are not actually * exiting the module */ @@ -2266,7 +2266,7 @@ int iwl4965_mac_start(struct ieee80211_hw *hw) /* Wait for START_ALIVE from Run Time ucode. Otherwise callbacks from * mac80211 will not be run successfully. */ - ret = wait_event_interruptible_timeout(priv->wait_command_queue, + ret = wait_event_timeout(priv->wait_command_queue, test_bit(STATUS_READY, &priv->status), UCODE_READY_TIMEOUT); if (!ret) { diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c index a895a099d08..56211006a18 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c @@ -167,7 +167,7 @@ static int iwlagn_set_temperature_offset_calib(struct iwl_priv *priv) memset(&cmd, 0, sizeof(cmd)); iwl_set_calib_hdr(&cmd.hdr, IWL_PHY_CALIBRATE_TEMP_OFFSET_CMD); - memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(offset_calib)); + memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(*offset_calib)); if (!(cmd.radio_sensor_offset)) cmd.radio_sensor_offset = DEFAULT_RADIO_SENSOR_OFFSET; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index b0ae4de7f08..f9c3cd95d61 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2140,7 +2140,12 @@ static int iwl_mac_setup_register(struct iwl_priv *priv, IEEE80211_HW_SPECTRUM_MGMT | IEEE80211_HW_REPORTS_TX_ACK_STATUS; + /* + * Including the following line will crash some AP's. This + * workaround removes the stimulus which causes the crash until + * the AP software can be fixed. hw->max_tx_aggregation_subframes = LINK_QUAL_AGG_FRAME_LIMIT_DEF; + */ hw->flags |= IEEE80211_HW_SUPPORTS_PS | IEEE80211_HW_SUPPORTS_DYNAMIC_PS; diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index dd6937e9705..77e528f5db8 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -405,31 +405,33 @@ int iwl_mac_hw_scan(struct ieee80211_hw *hw, mutex_lock(&priv->mutex); - if (test_bit(STATUS_SCANNING, &priv->status) && - priv->scan_type != IWL_SCAN_NORMAL) { - IWL_DEBUG_SCAN(priv, "Scan already in progress.\n"); - ret = -EAGAIN; - goto out_unlock; - } - - /* mac80211 will only ask for one band at a time */ - priv->scan_request = req; - priv->scan_vif = vif; - /* * If an internal scan is in progress, just set * up the scan_request as per above. */ if (priv->scan_type != IWL_SCAN_NORMAL) { - IWL_DEBUG_SCAN(priv, "SCAN request during internal scan\n"); + IWL_DEBUG_SCAN(priv, + "SCAN request during internal scan - defer\n"); + priv->scan_request = req; + priv->scan_vif = vif; ret = 0; - } else + } else { + priv->scan_request = req; + priv->scan_vif = vif; + /* + * mac80211 will only ask for one band at a time + * so using channels[0] here is ok + */ ret = iwl_scan_initiate(priv, vif, IWL_SCAN_NORMAL, req->channels[0]->band); + if (ret) { + priv->scan_request = NULL; + priv->scan_vif = NULL; + } + } IWL_DEBUG_MAC80211(priv, "leave\n"); -out_unlock: mutex_unlock(&priv->mutex); return ret; diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c index a6b2b1db0b1..222d410c586 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c +++ b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c @@ -771,6 +771,8 @@ void iwl_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb) cmd = txq->cmd[cmd_index]; meta = &txq->meta[cmd_index]; + txq->time_stamp = jiffies; + iwlagn_unmap_tfd(priv, meta, &txq->tfds[index], DMA_BIDIRECTIONAL); /* Input error checking is done when commands are added to queue. */ diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index ef67f6786a8..0019dfd8fb0 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -3697,14 +3697,15 @@ static void rt2800_efuse_read(struct rt2x00_dev *rt2x00dev, unsigned int i) rt2800_regbusy_read(rt2x00dev, EFUSE_CTRL, EFUSE_CTRL_KICK, ®); /* Apparently the data is read from end to start */ - rt2800_register_read_lock(rt2x00dev, EFUSE_DATA3, - (u32 *)&rt2x00dev->eeprom[i]); - rt2800_register_read_lock(rt2x00dev, EFUSE_DATA2, - (u32 *)&rt2x00dev->eeprom[i + 2]); - rt2800_register_read_lock(rt2x00dev, EFUSE_DATA1, - (u32 *)&rt2x00dev->eeprom[i + 4]); - rt2800_register_read_lock(rt2x00dev, EFUSE_DATA0, - (u32 *)&rt2x00dev->eeprom[i + 6]); + rt2800_register_read_lock(rt2x00dev, EFUSE_DATA3, ®); + /* The returned value is in CPU order, but eeprom is le */ + rt2x00dev->eeprom[i] = cpu_to_le32(reg); + rt2800_register_read_lock(rt2x00dev, EFUSE_DATA2, ®); + *(u32 *)&rt2x00dev->eeprom[i + 2] = cpu_to_le32(reg); + rt2800_register_read_lock(rt2x00dev, EFUSE_DATA1, ®); + *(u32 *)&rt2x00dev->eeprom[i + 4] = cpu_to_le32(reg); + rt2800_register_read_lock(rt2x00dev, EFUSE_DATA0, ®); + *(u32 *)&rt2x00dev->eeprom[i + 6] = cpu_to_le32(reg); mutex_unlock(&rt2x00dev->csr_mutex); } @@ -3870,19 +3871,23 @@ int rt2800_init_eeprom(struct rt2x00_dev *rt2x00dev) return -ENODEV; } - if (!rt2x00_rf(rt2x00dev, RF2820) && - !rt2x00_rf(rt2x00dev, RF2850) && - !rt2x00_rf(rt2x00dev, RF2720) && - !rt2x00_rf(rt2x00dev, RF2750) && - !rt2x00_rf(rt2x00dev, RF3020) && - !rt2x00_rf(rt2x00dev, RF2020) && - !rt2x00_rf(rt2x00dev, RF3021) && - !rt2x00_rf(rt2x00dev, RF3022) && - !rt2x00_rf(rt2x00dev, RF3052) && - !rt2x00_rf(rt2x00dev, RF3320) && - !rt2x00_rf(rt2x00dev, RF5370) && - !rt2x00_rf(rt2x00dev, RF5390)) { - ERROR(rt2x00dev, "Invalid RF chipset detected.\n"); + switch (rt2x00dev->chip.rf) { + case RF2820: + case RF2850: + case RF2720: + case RF2750: + case RF3020: + case RF2020: + case RF3021: + case RF3022: + case RF3052: + case RF3320: + case RF5370: + case RF5390: + break; + default: + ERROR(rt2x00dev, "Invalid RF chipset 0x%x detected.\n", + rt2x00dev->chip.rf); return -ENODEV; } diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index 1bdc1aa305c..04c4e9eb6ee 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -610,6 +610,11 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, mac->link_state = MAC80211_NOLINK; memset(mac->bssid, 0, 6); + + /* reset sec info */ + rtl_cam_reset_sec_info(hw); + + rtl_cam_reset_all_entry(hw); mac->vendor = PEER_UNKNOWN; RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG, @@ -1063,6 +1068,9 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, *or clear all entry here. */ rtl_cam_delete_one_entry(hw, mac_addr, key_idx); + + rtl_cam_reset_sec_info(hw); + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c index 906e7aa55bc..3e52a549622 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c @@ -549,15 +549,16 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw, (tcb_desc->rts_use_shortpreamble ? 1 : 0) : (tcb_desc->rts_use_shortgi ? 1 : 0))); if (mac->bw_40) { - if (tcb_desc->packet_bw) { + if (rate_flag & IEEE80211_TX_RC_DUP_DATA) { SET_TX_DESC_DATA_BW(txdesc, 1); SET_TX_DESC_DATA_SC(txdesc, 3); + } else if(rate_flag & IEEE80211_TX_RC_40_MHZ_WIDTH){ + SET_TX_DESC_DATA_BW(txdesc, 1); + SET_TX_DESC_DATA_SC(txdesc, mac->cur_40_prime_sc); } else { SET_TX_DESC_DATA_BW(txdesc, 0); - if (rate_flag & IEEE80211_TX_RC_DUP_DATA) - SET_TX_DESC_DATA_SC(txdesc, - mac->cur_40_prime_sc); - } + SET_TX_DESC_DATA_SC(txdesc, 0); + } } else { SET_TX_DESC_DATA_BW(txdesc, 0); SET_TX_DESC_DATA_SC(txdesc, 0); diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c index 8b1cef0ffde..4bf3cf457ef 100644 --- a/drivers/net/wireless/rtlwifi/usb.c +++ b/drivers/net/wireless/rtlwifi/usb.c @@ -863,6 +863,7 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, struct sk_buff *skb, u8 tid = 0; u16 seq_number = 0; + memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); if (ieee80211_is_auth(fc)) { RT_TRACE(rtlpriv, COMP_SEND, DBG_DMESG, ("MAC80211_LINKING\n")); rtl_ips_nic_on(hw); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 0ca86f9ec4e..182562952c7 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -327,12 +327,12 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, xenvif_get(vif); rtnl_lock(); - if (netif_running(vif->dev)) - xenvif_up(vif); if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) dev_set_mtu(vif->dev, ETH_DATA_LEN); netdev_update_features(vif->dev); netif_carrier_on(vif->dev); + if (netif_running(vif->dev)) + xenvif_up(vif); rtnl_unlock(); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4e84fd4a431..e9651f0a881 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; -enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE; +enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF; /* * The default CLS is used if arch didn't set CLS explicitly and not @@ -3568,10 +3568,14 @@ static int __init pci_setup(char *str) pci_hotplug_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "hpmemsize=", 10)) { pci_hotplug_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "pcie_bus_tune_off", 17)) { + pcie_bus_config = PCIE_BUS_TUNE_OFF; } else if (!strncmp(str, "pcie_bus_safe", 13)) { pcie_bus_config = PCIE_BUS_SAFE; } else if (!strncmp(str, "pcie_bus_perf", 13)) { pcie_bus_config = PCIE_BUS_PERFORMANCE; + } else if (!strncmp(str, "pcie_bus_peer2peer", 18)) { + pcie_bus_config = PCIE_BUS_PEER2PEER; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b1187ff31d8..6ab6bd3df4b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1351,7 +1351,8 @@ static int pcie_find_smpss(struct pci_dev *dev, void *data) * will occur as normal. */ if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || - dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + (dev->bus->self && + dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))) *smpss = 0; if (*smpss > dev->pcie_mpss) @@ -1457,12 +1458,24 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) */ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) { - u8 smpss = mpss; + u8 smpss; if (!pci_is_pcie(bus->self)) return; + if (pcie_bus_config == PCIE_BUS_TUNE_OFF) + return; + + /* FIXME - Peer to peer DMA is possible, though the endpoint would need + * to be aware to the MPS of the destination. To work around this, + * simply force the MPS of the entire system to the smallest possible. + */ + if (pcie_bus_config == PCIE_BUS_PEER2PEER) + smpss = 0; + if (pcie_bus_config == PCIE_BUS_SAFE) { + smpss = mpss; + pcie_find_smpss(bus->self, &smpss); pci_walk_bus(bus, pcie_find_smpss, &smpss); } diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 2dd3c016327..d93a9608b1f 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -35,6 +35,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/rtc.h> +#include <linux/sched.h> #include <linux/workqueue.h> /* DryIce Register Definitions */ diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 4e7c04e773e..7639ab906f0 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -51,6 +51,27 @@ static enum s3c_cpu_type s3c_rtc_cpu_type; static DEFINE_SPINLOCK(s3c_rtc_pie_lock); +static void s3c_rtc_alarm_clk_enable(bool enable) +{ + static DEFINE_SPINLOCK(s3c_rtc_alarm_clk_lock); + static bool alarm_clk_enabled; + unsigned long irq_flags; + + spin_lock_irqsave(&s3c_rtc_alarm_clk_lock, irq_flags); + if (enable) { + if (!alarm_clk_enabled) { + clk_enable(rtc_clk); + alarm_clk_enabled = true; + } + } else { + if (alarm_clk_enabled) { + clk_disable(rtc_clk); + alarm_clk_enabled = false; + } + } + spin_unlock_irqrestore(&s3c_rtc_alarm_clk_lock, irq_flags); +} + /* IRQ Handlers */ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id) @@ -64,6 +85,9 @@ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id) writeb(S3C2410_INTP_ALM, s3c_rtc_base + S3C2410_INTP); clk_disable(rtc_clk); + + s3c_rtc_alarm_clk_enable(false); + return IRQ_HANDLED; } @@ -97,6 +121,8 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) writeb(tmp, s3c_rtc_base + S3C2410_RTCALM); clk_disable(rtc_clk); + s3c_rtc_alarm_clk_enable(enabled); + return 0; } diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index cbde448f994..eb3140ee821 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -654,8 +654,8 @@ static struct io_subchannel_private console_priv; static int console_subchannel_in_use; /* - * Use tpi to get a pending interrupt, call the interrupt handler and - * return a pointer to the subchannel structure. + * Use cio_tpi to get a pending interrupt and call the interrupt handler. + * Return non-zero if an interrupt was processed, zero otherwise. */ static int cio_tpi(void) { @@ -667,6 +667,10 @@ static int cio_tpi(void) tpi_info = (struct tpi_info *)&S390_lowcore.subchannel_id; if (tpi(NULL) != 1) return 0; + if (tpi_info->adapter_IO) { + do_adapter_IO(tpi_info->isc); + return 1; + } irb = (struct irb *)&S390_lowcore.irb; /* Store interrupt response block to lowcore. */ if (tsch(tpi_info->schid, irb) != 0) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index b7bd5b0cc7a..3868ab2397c 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1800,10 +1800,12 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ switch (retval) { case SCSI_MLQUEUE_HOST_BUSY: twa_free_request_id(tw_dev, request_id); + twa_unmap_scsi_data(tw_dev, request_id); break; case 1: tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); + twa_unmap_scsi_data(tw_dev, request_id); SCpnt->result = (DID_ERROR << 16); done(SCpnt); retval = 0; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 8d9dae89f06..3878b739508 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -837,6 +837,7 @@ config SCSI_ISCI # (temporary): known alpha quality driver depends on EXPERIMENTAL select SCSI_SAS_LIBSAS + select SCSI_SAS_HOST_SMP ---help--- This driver supports the 6Gb/s SAS capabilities of the storage control unit found in the Intel(R) C600 series chipset. diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 3c08f5352b2..6153a66a8a3 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -88,7 +88,7 @@ obj-$(CONFIG_SCSI_QLOGIC_FAS) += qlogicfas408.o qlogicfas.o obj-$(CONFIG_PCMCIA_QLOGIC) += qlogicfas408.o obj-$(CONFIG_SCSI_QLOGIC_1280) += qla1280.o obj-$(CONFIG_SCSI_QLA_FC) += qla2xxx/ -obj-$(CONFIG_SCSI_QLA_ISCSI) += qla4xxx/ +obj-$(CONFIG_SCSI_QLA_ISCSI) += libiscsi.o qla4xxx/ obj-$(CONFIG_SCSI_LPFC) += lpfc/ obj-$(CONFIG_SCSI_BFA_FC) += bfa/ obj-$(CONFIG_SCSI_PAS16) += pas16.o diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index e7d0d47b918..e5f2d7d9002 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1283,6 +1283,8 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) kfree(aac->queues); aac->queues = NULL; free_irq(aac->pdev->irq, aac); + if (aac->msi) + pci_disable_msi(aac->pdev); kfree(aac->fsa_dev); aac->fsa_dev = NULL; quirks = aac_get_driver_ident(index)->quirks; diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 9ae80cd5953..dba72a4e6a1 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -563,7 +563,7 @@ int bnx2i_send_iscsi_nopout(struct bnx2i_conn *bnx2i_conn, nopout_wqe->itt = ((u16)task->itt | (ISCSI_TASK_TYPE_MPATH << ISCSI_TMF_REQUEST_TYPE_SHIFT)); - nopout_wqe->ttt = nopout_hdr->ttt; + nopout_wqe->ttt = be32_to_cpu(nopout_hdr->ttt); nopout_wqe->flags = 0; if (!unsol) nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION; diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index bd22041e278..f5864485033 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -913,7 +913,7 @@ static void l2t_put(struct cxgbi_sock *csk) struct t3cdev *t3dev = (struct t3cdev *)csk->cdev->lldev; if (csk->l2t) { - l2t_release(L2DATA(t3dev), csk->l2t); + l2t_release(t3dev, csk->l2t); csk->l2t = NULL; cxgbi_sock_put(csk); } diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index ba710e350ac..5d0e9a24ae9 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -432,6 +432,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) u8 flogi_maddr[ETH_ALEN]; const struct net_device_ops *ops; + rtnl_lock(); + /* * Don't listen for Ethernet packets anymore. * synchronize_net() ensures that the packet handlers are not running @@ -461,6 +463,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) " specific feature for LLD.\n"); } + rtnl_unlock(); + /* Release the self-reference taken during fcoe_interface_create() */ fcoe_interface_put(fcoe); } @@ -1951,11 +1955,8 @@ static void fcoe_destroy_work(struct work_struct *work) fcoe_if_destroy(port->lport); /* Do not tear down the fcoe interface for NPIV port */ - if (!npiv) { - rtnl_lock(); + if (!npiv) fcoe_interface_cleanup(fcoe); - rtnl_unlock(); - } mutex_unlock(&fcoe_config_mutex); } @@ -2009,8 +2010,9 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) printk(KERN_ERR "fcoe: Failed to create interface (%s)\n", netdev->name); rc = -EIO; + rtnl_unlock(); fcoe_interface_cleanup(fcoe); - goto out_nodev; + goto out_nortnl; } /* Make this the "master" N_Port */ @@ -2027,6 +2029,7 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) out_nodev: rtnl_unlock(); +out_nortnl: mutex_unlock(&fcoe_config_mutex); return rc; } diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index ec61bdb833a..b200b736b00 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -676,6 +676,16 @@ static void hpsa_scsi_replace_entry(struct ctlr_info *h, int hostno, BUG_ON(entry < 0 || entry >= HPSA_MAX_SCSI_DEVS_PER_HBA); removed[*nremoved] = h->dev[entry]; (*nremoved)++; + + /* + * New physical devices won't have target/lun assigned yet + * so we need to preserve the values in the slot we are replacing. + */ + if (new_entry->target == -1) { + new_entry->target = h->dev[entry]->target; + new_entry->lun = h->dev[entry]->lun; + } + h->dev[entry] = new_entry; added[*nadded] = new_entry; (*nadded)++; @@ -1548,10 +1558,17 @@ static inline void hpsa_set_bus_target_lun(struct hpsa_scsi_dev_t *device, } static int hpsa_update_device_info(struct ctlr_info *h, - unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device) + unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device, + unsigned char *is_OBDR_device) { -#define OBDR_TAPE_INQ_SIZE 49 + +#define OBDR_SIG_OFFSET 43 +#define OBDR_TAPE_SIG "$DR-10" +#define OBDR_SIG_LEN (sizeof(OBDR_TAPE_SIG) - 1) +#define OBDR_TAPE_INQ_SIZE (OBDR_SIG_OFFSET + OBDR_SIG_LEN) + unsigned char *inq_buff; + unsigned char *obdr_sig; inq_buff = kzalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); if (!inq_buff) @@ -1583,6 +1600,16 @@ static int hpsa_update_device_info(struct ctlr_info *h, else this_device->raid_level = RAID_UNKNOWN; + if (is_OBDR_device) { + /* See if this is a One-Button-Disaster-Recovery device + * by looking for "$DR-10" at offset 43 in inquiry data. + */ + obdr_sig = &inq_buff[OBDR_SIG_OFFSET]; + *is_OBDR_device = (this_device->devtype == TYPE_ROM && + strncmp(obdr_sig, OBDR_TAPE_SIG, + OBDR_SIG_LEN) == 0); + } + kfree(inq_buff); return 0; @@ -1716,7 +1743,7 @@ static int add_msa2xxx_enclosure_device(struct ctlr_info *h, return 0; } - if (hpsa_update_device_info(h, scsi3addr, this_device)) + if (hpsa_update_device_info(h, scsi3addr, this_device, NULL)) return 0; (*nmsa2xxx_enclosures)++; hpsa_set_bus_target_lun(this_device, bus, target, 0); @@ -1808,7 +1835,6 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) */ struct ReportLUNdata *physdev_list = NULL; struct ReportLUNdata *logdev_list = NULL; - unsigned char *inq_buff = NULL; u32 nphysicals = 0; u32 nlogicals = 0; u32 ndev_allocated = 0; @@ -1824,11 +1850,9 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) GFP_KERNEL); physdev_list = kzalloc(reportlunsize, GFP_KERNEL); logdev_list = kzalloc(reportlunsize, GFP_KERNEL); - inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL); - if (!currentsd || !physdev_list || !logdev_list || - !inq_buff || !tmpdevice) { + if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) { dev_err(&h->pdev->dev, "out of memory\n"); goto out; } @@ -1863,7 +1887,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) /* adjust our table of devices */ nmsa2xxx_enclosures = 0; for (i = 0; i < nphysicals + nlogicals + 1; i++) { - u8 *lunaddrbytes; + u8 *lunaddrbytes, is_OBDR = 0; /* Figure out where the LUN ID info is coming from */ lunaddrbytes = figure_lunaddrbytes(h, raid_ctlr_position, @@ -1874,7 +1898,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) continue; /* Get device type, vendor, model, device id */ - if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice)) + if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice, + &is_OBDR)) continue; /* skip it if we can't talk to it. */ figure_bus_target_lun(h, lunaddrbytes, &bus, &target, &lun, tmpdevice); @@ -1898,7 +1923,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) hpsa_set_bus_target_lun(this_device, bus, target, lun); switch (this_device->devtype) { - case TYPE_ROM: { + case TYPE_ROM: /* We don't *really* support actual CD-ROM devices, * just "One Button Disaster Recovery" tape drive * which temporarily pretends to be a CD-ROM drive. @@ -1906,15 +1931,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) * device by checking for "$DR-10" in bytes 43-48 of * the inquiry data. */ - char obdr_sig[7]; -#define OBDR_TAPE_SIG "$DR-10" - strncpy(obdr_sig, &inq_buff[43], 6); - obdr_sig[6] = '\0'; - if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0) - /* Not OBDR device, ignore it. */ - break; - } - ncurrent++; + if (is_OBDR) + ncurrent++; break; case TYPE_DISK: if (i < nphysicals) @@ -1947,7 +1965,6 @@ out: for (i = 0; i < ndev_allocated; i++) kfree(currentsd[i]); kfree(currentsd); - kfree(inq_buff); kfree(physdev_list); kfree(logdev_list); } diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 26072f1e985..6981b773a88 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -531,6 +531,9 @@ static void sci_controller_process_completions(struct isci_host *ihost) break; case SCU_COMPLETION_TYPE_EVENT: + sci_controller_event_completion(ihost, ent); + break; + case SCU_COMPLETION_TYPE_NOTIFY: { event_cycle ^= ((event_get+1) & SCU_MAX_EVENTS) << (SMU_COMPLETION_QUEUE_GET_EVENT_CYCLE_BIT_SHIFT - SCU_MAX_EVENTS_SHIFT); @@ -1091,6 +1094,7 @@ static void isci_host_completion_routine(unsigned long data) struct isci_request *request; struct isci_request *next_request; struct sas_task *task; + u16 active; INIT_LIST_HEAD(&completed_request_list); INIT_LIST_HEAD(&errored_request_list); @@ -1181,6 +1185,13 @@ static void isci_host_completion_routine(unsigned long data) } } + /* the coalesence timeout doubles at each encoding step, so + * update it based on the ilog2 value of the outstanding requests + */ + active = isci_tci_active(ihost); + writel(SMU_ICC_GEN_VAL(NUMBER, active) | + SMU_ICC_GEN_VAL(TIMER, ISCI_COALESCE_BASE + ilog2(active)), + &ihost->smu_registers->interrupt_coalesce_control); } /** @@ -1471,7 +1482,7 @@ static void sci_controller_ready_state_enter(struct sci_base_state_machine *sm) struct isci_host *ihost = container_of(sm, typeof(*ihost), sm); /* set the default interrupt coalescence number and timeout value. */ - sci_controller_set_interrupt_coalescence(ihost, 0x10, 250); + sci_controller_set_interrupt_coalescence(ihost, 0, 0); } static void sci_controller_ready_state_exit(struct sci_base_state_machine *sm) diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index 062101a39f7..9f33831a2f0 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -369,6 +369,9 @@ static inline struct isci_host *dev_to_ihost(struct domain_device *dev) #define ISCI_TAG_SEQ(tag) (((tag) >> 12) & (SCI_MAX_SEQ-1)) #define ISCI_TAG_TCI(tag) ((tag) & (SCI_MAX_IO_REQUESTS-1)) +/* interrupt coalescing baseline: 9 == 3 to 5us interrupt delay per command */ +#define ISCI_COALESCE_BASE 9 + /* expander attached sata devices require 3 rnc slots */ static inline int sci_remote_device_node_count(struct isci_remote_device *idev) { diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 61e0d09e2b5..29aa34efb0f 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -59,10 +59,19 @@ #include <linux/firmware.h> #include <linux/efi.h> #include <asm/string.h> +#include <scsi/scsi_host.h> #include "isci.h" #include "task.h" #include "probe_roms.h" +#define MAJ 1 +#define MIN 0 +#define BUILD 0 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ + __stringify(BUILD) + +MODULE_VERSION(DRV_VERSION); + static struct scsi_transport_template *isci_transport_template; static DEFINE_PCI_DEVICE_TABLE(isci_id_table) = { @@ -113,6 +122,22 @@ unsigned char max_concurr_spinup = 1; module_param(max_concurr_spinup, byte, 0); MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup"); +static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev); + struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); + struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha); + + return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id); +} + +static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL); + +struct device_attribute *isci_host_attrs[] = { + &dev_attr_isci_id, + NULL +}; + static struct scsi_host_template isci_sht = { .module = THIS_MODULE, @@ -138,6 +163,7 @@ static struct scsi_host_template isci_sht = { .slave_alloc = sas_slave_alloc, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, + .shost_attrs = isci_host_attrs, }; static struct sas_domain_function_template isci_transport_ops = { @@ -232,17 +258,6 @@ static int isci_register_sas_ha(struct isci_host *isci_host) return 0; } -static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev); - struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); - struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha); - - return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id); -} - -static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL); - static void isci_unregister(struct isci_host *isci_host) { struct Scsi_Host *shost; @@ -251,7 +266,6 @@ static void isci_unregister(struct isci_host *isci_host) return; shost = isci_host->shost; - device_remove_file(&shost->shost_dev, &dev_attr_isci_id); sas_unregister_ha(&isci_host->sas_ha); @@ -415,14 +429,8 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id) if (err) goto err_shost_remove; - err = device_create_file(&shost->shost_dev, &dev_attr_isci_id); - if (err) - goto err_unregister_ha; - return isci_host; - err_unregister_ha: - sas_unregister_ha(&(isci_host->sas_ha)); err_shost_remove: scsi_remove_host(shost); err_shost: @@ -540,7 +548,8 @@ static __init int isci_init(void) { int err; - pr_info("%s: Intel(R) C600 SAS Controller Driver\n", DRV_NAME); + pr_info("%s: Intel(R) C600 SAS Controller Driver - version %s\n", + DRV_NAME, DRV_VERSION); isci_transport_template = sas_domain_attach_transport(&isci_transport_ops); if (!isci_transport_template) diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c index 79313a7a235..430fc8ff014 100644 --- a/drivers/scsi/isci/phy.c +++ b/drivers/scsi/isci/phy.c @@ -104,6 +104,7 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy, u32 parity_count = 0; u32 llctl, link_rate; u32 clksm_value = 0; + u32 sp_timeouts = 0; iphy->link_layer_registers = reg; @@ -211,6 +212,18 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy, llctl |= SCU_SAS_LLCTL_GEN_VAL(MAX_LINK_RATE, link_rate); writel(llctl, &iphy->link_layer_registers->link_layer_control); + sp_timeouts = readl(&iphy->link_layer_registers->sas_phy_timeouts); + + /* Clear the default 0x36 (54us) RATE_CHANGE timeout value. */ + sp_timeouts &= ~SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0xFF); + + /* Set RATE_CHANGE timeout value to 0x3B (59us). This ensures SCU can + * lock with 3Gb drive when SCU max rate is set to 1.5Gb. + */ + sp_timeouts |= SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0x3B); + + writel(sp_timeouts, &iphy->link_layer_registers->sas_phy_timeouts); + if (is_a2(ihost->pdev)) { /* Program the max ARB time for the PHY to 700us so we inter-operate with * the PMC expander which shuts down PHYs if the expander PHY generates too diff --git a/drivers/scsi/isci/registers.h b/drivers/scsi/isci/registers.h index 9b266c7428e..00afc738bbe 100644 --- a/drivers/scsi/isci/registers.h +++ b/drivers/scsi/isci/registers.h @@ -1299,6 +1299,18 @@ struct scu_transport_layer_registers { #define SCU_AFE_XCVRCR_OFFSET 0x00DC #define SCU_AFE_LUTCR_OFFSET 0x00E0 +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_SHIFT (0UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_MASK (0x000000FFUL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_SHIFT (8UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_MASK (0x0000FF00UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_SHIFT (16UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_MASK (0x00FF0000UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_SHIFT (24UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_MASK (0xFF000000UL) + +#define SCU_SAS_PHYTOV_GEN_VAL(name, value) \ + SCU_GEN_VALUE(SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_##name, value) + #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_SHIFT (0) #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_MASK (0x00000003) #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_GEN1 (0) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index a46e07ac789..b5d3a8c4d32 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -732,12 +732,20 @@ sci_io_request_terminate(struct isci_request *ireq) sci_change_state(&ireq->sm, SCI_REQ_ABORTING); return SCI_SUCCESS; case SCI_REQ_TASK_WAIT_TC_RESP: + /* The task frame was already confirmed to have been + * sent by the SCU HW. Since the state machine is + * now only waiting for the task response itself, + * abort the request and complete it immediately + * and don't wait for the task response. + */ sci_change_state(&ireq->sm, SCI_REQ_ABORTING); sci_change_state(&ireq->sm, SCI_REQ_COMPLETED); return SCI_SUCCESS; case SCI_REQ_ABORTING: - sci_change_state(&ireq->sm, SCI_REQ_COMPLETED); - return SCI_SUCCESS; + /* If a request has a termination requested twice, return + * a failure indication, since HW confirmation of the first + * abort is still outstanding. + */ case SCI_REQ_COMPLETED: default: dev_warn(&ireq->owning_controller->pdev->dev, @@ -2399,22 +2407,19 @@ static void isci_task_save_for_upper_layer_completion( } } -static void isci_request_process_stp_response(struct sas_task *task, - void *response_buffer) +static void isci_process_stp_response(struct sas_task *task, struct dev_to_host_fis *fis) { - struct dev_to_host_fis *d2h_reg_fis = response_buffer; struct task_status_struct *ts = &task->task_status; struct ata_task_resp *resp = (void *)&ts->buf[0]; - resp->frame_len = le16_to_cpu(*(__le16 *)(response_buffer + 6)); - memcpy(&resp->ending_fis[0], response_buffer + 16, 24); + resp->frame_len = sizeof(*fis); + memcpy(resp->ending_fis, fis, sizeof(*fis)); ts->buf_valid_size = sizeof(*resp); - /** - * If the device fault bit is set in the status register, then + /* If the device fault bit is set in the status register, then * set the sense data and return. */ - if (d2h_reg_fis->status & ATA_DF) + if (fis->status & ATA_DF) ts->stat = SAS_PROTO_RESPONSE; else ts->stat = SAM_STAT_GOOD; @@ -2428,7 +2433,6 @@ static void isci_request_io_request_complete(struct isci_host *ihost, { struct sas_task *task = isci_request_access_task(request); struct ssp_response_iu *resp_iu; - void *resp_buf; unsigned long task_flags; struct isci_remote_device *idev = isci_lookup_device(task->dev); enum service_response response = SAS_TASK_UNDELIVERED; @@ -2565,9 +2569,7 @@ static void isci_request_io_request_complete(struct isci_host *ihost, task); if (sas_protocol_ata(task->task_proto)) { - resp_buf = &request->stp.rsp; - isci_request_process_stp_response(task, - resp_buf); + isci_process_stp_response(task, &request->stp.rsp); } else if (SAS_PROTOCOL_SSP == task->task_proto) { /* crack the iu response buffer. */ diff --git a/drivers/scsi/isci/unsolicited_frame_control.c b/drivers/scsi/isci/unsolicited_frame_control.c index e9e1e2abacb..16f88ab939c 100644 --- a/drivers/scsi/isci/unsolicited_frame_control.c +++ b/drivers/scsi/isci/unsolicited_frame_control.c @@ -72,7 +72,7 @@ int sci_unsolicited_frame_control_construct(struct isci_host *ihost) */ buf_len = SCU_MAX_UNSOLICITED_FRAMES * SCU_UNSOLICITED_FRAME_BUFFER_SIZE; header_len = SCU_MAX_UNSOLICITED_FRAMES * sizeof(struct scu_unsolicited_frame_header); - size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(dma_addr_t); + size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(uf_control->address_table.array[0]); /* * The Unsolicited Frame buffers are set at the start of the UF diff --git a/drivers/scsi/isci/unsolicited_frame_control.h b/drivers/scsi/isci/unsolicited_frame_control.h index 31cb9506f52..75d896686f5 100644 --- a/drivers/scsi/isci/unsolicited_frame_control.h +++ b/drivers/scsi/isci/unsolicited_frame_control.h @@ -214,7 +214,7 @@ struct sci_uf_address_table_array { * starting address of the UF address table. * 64-bit pointers are required by the hardware. */ - dma_addr_t *array; + u64 *array; /** * This field specifies the physical address location for the UF diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 01ff082dc34..d261e982a2f 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -494,6 +494,9 @@ static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp, */ error = lport->tt.frame_send(lport, fp); + if (fh->fh_type == FC_TYPE_BLS) + return error; + /* * Update the exchange and sequence flags, * assuming all frames for the sequence have been sent. @@ -575,42 +578,35 @@ static void fc_seq_set_resp(struct fc_seq *sp, } /** - * fc_seq_exch_abort() - Abort an exchange and sequence - * @req_sp: The sequence to be aborted + * fc_exch_abort_locked() - Abort an exchange + * @ep: The exchange to be aborted * @timer_msec: The period of time to wait before aborting * - * Generally called because of a timeout or an abort from the upper layer. + * Locking notes: Called with exch lock held + * + * Return value: 0 on success else error code */ -static int fc_seq_exch_abort(const struct fc_seq *req_sp, - unsigned int timer_msec) +static int fc_exch_abort_locked(struct fc_exch *ep, + unsigned int timer_msec) { struct fc_seq *sp; - struct fc_exch *ep; struct fc_frame *fp; int error; - ep = fc_seq_exch(req_sp); - - spin_lock_bh(&ep->ex_lock); if (ep->esb_stat & (ESB_ST_COMPLETE | ESB_ST_ABNORMAL) || - ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP)) { - spin_unlock_bh(&ep->ex_lock); + ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP)) return -ENXIO; - } /* * Send the abort on a new sequence if possible. */ sp = fc_seq_start_next_locked(&ep->seq); - if (!sp) { - spin_unlock_bh(&ep->ex_lock); + if (!sp) return -ENOMEM; - } ep->esb_stat |= ESB_ST_SEQ_INIT | ESB_ST_ABNORMAL; if (timer_msec) fc_exch_timer_set_locked(ep, timer_msec); - spin_unlock_bh(&ep->ex_lock); /* * If not logged into the fabric, don't send ABTS but leave @@ -633,6 +629,28 @@ static int fc_seq_exch_abort(const struct fc_seq *req_sp, } /** + * fc_seq_exch_abort() - Abort an exchange and sequence + * @req_sp: The sequence to be aborted + * @timer_msec: The period of time to wait before aborting + * + * Generally called because of a timeout or an abort from the upper layer. + * + * Return value: 0 on success else error code + */ +static int fc_seq_exch_abort(const struct fc_seq *req_sp, + unsigned int timer_msec) +{ + struct fc_exch *ep; + int error; + + ep = fc_seq_exch(req_sp); + spin_lock_bh(&ep->ex_lock); + error = fc_exch_abort_locked(ep, timer_msec); + spin_unlock_bh(&ep->ex_lock); + return error; +} + +/** * fc_exch_timeout() - Handle exchange timer expiration * @work: The work_struct identifying the exchange that timed out */ @@ -1715,6 +1733,7 @@ static void fc_exch_reset(struct fc_exch *ep) int rc = 1; spin_lock_bh(&ep->ex_lock); + fc_exch_abort_locked(ep, 0); ep->state |= FC_EX_RST_CLEANUP; if (cancel_delayed_work(&ep->timeout_work)) atomic_dec(&ep->ex_refcnt); /* drop hold for timer */ @@ -1962,6 +1981,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport, struct fc_exch *ep; struct fc_seq *sp = NULL; struct fc_frame_header *fh; + struct fc_fcp_pkt *fsp = NULL; int rc = 1; ep = fc_exch_alloc(lport, fp); @@ -1984,8 +2004,10 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport, fc_exch_setup_hdr(ep, fp, ep->f_ctl); sp->cnt++; - if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD) + if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD) { + fsp = fr_fsp(fp); fc_fcp_ddp_setup(fr_fsp(fp), ep->xid); + } if (unlikely(lport->tt.frame_send(lport, fp))) goto err; @@ -1999,7 +2021,8 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport, spin_unlock_bh(&ep->ex_lock); return sp; err: - fc_fcp_ddp_done(fr_fsp(fp)); + if (fsp) + fc_fcp_ddp_done(fsp); rc = fc_exch_done_locked(ep); spin_unlock_bh(&ep->ex_lock); if (!rc) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index afb63c84314..4c41ee816f0 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -2019,6 +2019,11 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd) struct fc_fcp_internal *si; int rc = FAILED; unsigned long flags; + int rval; + + rval = fc_block_scsi_eh(sc_cmd); + if (rval) + return rval; lport = shost_priv(sc_cmd->device->host); if (lport->state != LPORT_ST_READY) @@ -2068,9 +2073,9 @@ int fc_eh_device_reset(struct scsi_cmnd *sc_cmd) int rc = FAILED; int rval; - rval = fc_remote_port_chkready(rport); + rval = fc_block_scsi_eh(sc_cmd); if (rval) - goto out; + return rval; lport = shost_priv(sc_cmd->device->host); @@ -2116,6 +2121,8 @@ int fc_eh_host_reset(struct scsi_cmnd *sc_cmd) FC_SCSI_DBG(lport, "Resetting host\n"); + fc_block_scsi_eh(sc_cmd); + lport->tt.lport_reset(lport); wait_tmo = jiffies + FC_HOST_RESET_TIMEOUT; while (!fc_fcp_lport_queue_ready(lport) && time_before(jiffies, diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index e55ed9cf23f..628f347404f 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -88,6 +88,7 @@ */ #include <linux/timer.h> +#include <linux/delay.h> #include <linux/slab.h> #include <asm/unaligned.h> @@ -1029,8 +1030,16 @@ static void fc_lport_enter_reset(struct fc_lport *lport) FCH_EVT_LIPRESET, 0); fc_vports_linkchange(lport); fc_lport_reset_locked(lport); - if (lport->link_up) + if (lport->link_up) { + /* + * Wait upto resource allocation time out before + * doing re-login since incomplete FIP exchanged + * from last session may collide with exchanges + * in new session. + */ + msleep(lport->r_a_tov); fc_lport_enter_flogi(lport); + } } /** diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index f84084bba2f..16ad97df5ba 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1721,7 +1721,7 @@ static int sas_find_bcast_dev(struct domain_device *dev, list_for_each_entry(ch, &ex->children, siblings) { if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) { res = sas_find_bcast_dev(ch, src_dev); - if (src_dev) + if (*src_dev) return res; } } @@ -1769,10 +1769,12 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, sas_disable_routing(parent, phy->attached_sas_addr); } memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); - sas_port_delete_phy(phy->port, phy->phy); - if (phy->port->num_phys == 0) - sas_port_delete(phy->port); - phy->port = NULL; + if (phy->port) { + sas_port_delete_phy(phy->port, phy->phy); + if (phy->port->num_phys == 0) + sas_port_delete(phy->port); + phy->port = NULL; + } } static int sas_discover_bfs_by_root_level(struct domain_device *root, diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 7836eb01c7f..a31e05f3bfd 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1786,13 +1786,16 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable) fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN); } - if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) { + if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) { + int prot = 0; vha->flags.difdix_supported = 1; ql_dbg(ql_dbg_user, vha, 0x7082, "Registered for DIF/DIX type 1 and 3 protection.\n"); + if (ql2xenabledif == 1) + prot = SHOST_DIX_TYPE0_PROTECTION; scsi_host_set_prot(vha->host, - SHOST_DIF_TYPE1_PROTECTION + prot | SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION | SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 2155071f310..d79cd8a5f83 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -8,24 +8,24 @@ /* * Table for showing the current message id in use for particular level * Change this table for addition of log/debug messages. - * ----------------------------------------------------- - * | Level | Last Value Used | - * ----------------------------------------------------- - * | Module Init and Probe | 0x0116 | - * | Mailbox commands | 0x111e | - * | Device Discovery | 0x2083 | - * | Queue Command and IO tracing | 0x302e | - * | DPC Thread | 0x401c | - * | Async Events | 0x5059 | - * | Timer Routines | 0x600d | - * | User Space Interactions | 0x709c | - * | Task Management | 0x8043 | - * | AER/EEH | 0x900f | - * | Virtual Port | 0xa007 | - * | ISP82XX Specific | 0xb027 | - * | MultiQ | 0xc00b | - * | Misc | 0xd00b | - * ----------------------------------------------------- + * ---------------------------------------------------------------------- + * | Level | Last Value Used | Holes | + * ---------------------------------------------------------------------- + * | Module Init and Probe | 0x0116 | | + * | Mailbox commands | 0x1126 | | + * | Device Discovery | 0x2083 | | + * | Queue Command and IO tracing | 0x302e | 0x3008 | + * | DPC Thread | 0x401c | | + * | Async Events | 0x5059 | | + * | Timer Routines | 0x600d | | + * | User Space Interactions | 0x709d | | + * | Task Management | 0x8041 | | + * | AER/EEH | 0x900f | | + * | Virtual Port | 0xa007 | | + * | ISP82XX Specific | 0xb04f | | + * | MultiQ | 0xc00b | | + * | Misc | 0xd00b | | + * ---------------------------------------------------------------------- */ #include "qla_def.h" diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index cc5a79259d3..a03eaf40f37 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2529,6 +2529,7 @@ struct qla_hw_data { #define DT_ISP8021 BIT_14 #define DT_ISP_LAST (DT_ISP8021 << 1) +#define DT_T10_PI BIT_25 #define DT_IIDMA BIT_26 #define DT_FWI2 BIT_27 #define DT_ZIO_SUPPORTED BIT_28 @@ -2572,6 +2573,7 @@ struct qla_hw_data { #define IS_NOCACHE_VPD_TYPE(ha) (IS_QLA81XX(ha)) #define IS_ALOGIO_CAPABLE(ha) (IS_QLA23XX(ha) || IS_FWI2_CAPABLE(ha)) +#define IS_T10_PI_CAPABLE(ha) ((ha)->device_type & DT_T10_PI) #define IS_IIDMA_CAPABLE(ha) ((ha)->device_type & DT_IIDMA) #define IS_FWI2_CAPABLE(ha) ((ha)->device_type & DT_FWI2) #define IS_ZIO_SUPPORTED(ha) ((ha)->device_type & DT_ZIO_SUPPORTED) diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index 691783abfb6..aa69486dc06 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -537,6 +537,11 @@ struct sts_entry_24xx { /* * If DIF Error is set in comp_status, these additional fields are * defined: + * + * !!! NOTE: Firmware sends expected/actual DIF data in big endian + * format; but all of the "data" field gets swab32-d in the beginning + * of qla2x00_status_entry(). + * * &data[10] : uint8_t report_runt_bg[2]; - computed guard * &data[12] : uint8_t actual_dif[8]; - DIF Data received * &data[20] : uint8_t expected_dif[8]; - DIF Data computed diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index def694271bf..37da04d3db2 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3838,15 +3838,12 @@ qla2x00_loop_resync(scsi_qla_host_t *vha) req = vha->req; rsp = req->rsp; - atomic_set(&vha->loop_state, LOOP_UPDATE); clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags); if (vha->flags.online) { if (!(rval = qla2x00_fw_ready(vha))) { /* Wait at most MAX_TARGET RSCNs for a stable link. */ wait_time = 256; do { - atomic_set(&vha->loop_state, LOOP_UPDATE); - /* Issue a marker after FW becomes ready. */ qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL); diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index d2e904bc21c..9902834e0b7 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -102,3 +102,32 @@ qla2x00_set_fcport_state(fc_port_t *fcport, int state) fcport->d_id.b.al_pa); } } + +static inline int +qla2x00_hba_err_chk_enabled(srb_t *sp) +{ + /* + * Uncomment when corresponding SCSI changes are done. + * + if (!sp->cmd->prot_chk) + return 0; + * + */ + + switch (scsi_get_prot_op(sp->cmd)) { + case SCSI_PROT_READ_STRIP: + case SCSI_PROT_WRITE_INSERT: + if (ql2xenablehba_err_chk >= 1) + return 1; + break; + case SCSI_PROT_READ_PASS: + case SCSI_PROT_WRITE_PASS: + if (ql2xenablehba_err_chk >= 2) + return 1; + break; + case SCSI_PROT_READ_INSERT: + case SCSI_PROT_WRITE_STRIP: + return 1; + } + return 0; +} diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 49d6906af88..dbec89622a0 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -709,20 +709,28 @@ struct fw_dif_context { * */ static inline void -qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt, +qla24xx_set_t10dif_tags(srb_t *sp, struct fw_dif_context *pkt, unsigned int protcnt) { - struct sd_dif_tuple *spt; + struct scsi_cmnd *cmd = sp->cmd; scsi_qla_host_t *vha = shost_priv(cmd->device->host); - unsigned char op = scsi_get_prot_op(cmd); switch (scsi_get_prot_type(cmd)) { - /* For TYPE 0 protection: no checking */ case SCSI_PROT_DIF_TYPE0: - pkt->ref_tag_mask[0] = 0x00; - pkt->ref_tag_mask[1] = 0x00; - pkt->ref_tag_mask[2] = 0x00; - pkt->ref_tag_mask[3] = 0x00; + /* + * No check for ql2xenablehba_err_chk, as it would be an + * I/O error if hba tag generation is not done. + */ + pkt->ref_tag = cpu_to_le32((uint32_t) + (0xffffffff & scsi_get_lba(cmd))); + + if (!qla2x00_hba_err_chk_enabled(sp)) + break; + + pkt->ref_tag_mask[0] = 0xff; + pkt->ref_tag_mask[1] = 0xff; + pkt->ref_tag_mask[2] = 0xff; + pkt->ref_tag_mask[3] = 0xff; break; /* @@ -730,20 +738,16 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt, * match LBA in CDB + N */ case SCSI_PROT_DIF_TYPE2: - if (!ql2xenablehba_err_chk) - break; - - if (scsi_prot_sg_count(cmd)) { - spt = page_address(sg_page(scsi_prot_sglist(cmd))) + - scsi_prot_sglist(cmd)[0].offset; - pkt->app_tag = swab32(spt->app_tag); - pkt->app_tag_mask[0] = 0xff; - pkt->app_tag_mask[1] = 0xff; - } + pkt->app_tag = __constant_cpu_to_le16(0); + pkt->app_tag_mask[0] = 0x0; + pkt->app_tag_mask[1] = 0x0; pkt->ref_tag = cpu_to_le32((uint32_t) (0xffffffff & scsi_get_lba(cmd))); + if (!qla2x00_hba_err_chk_enabled(sp)) + break; + /* enable ALL bytes of the ref tag */ pkt->ref_tag_mask[0] = 0xff; pkt->ref_tag_mask[1] = 0xff; @@ -763,26 +767,15 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt, * 16 bit app tag. */ case SCSI_PROT_DIF_TYPE1: - if (!ql2xenablehba_err_chk) + pkt->ref_tag = cpu_to_le32((uint32_t) + (0xffffffff & scsi_get_lba(cmd))); + pkt->app_tag = __constant_cpu_to_le16(0); + pkt->app_tag_mask[0] = 0x0; + pkt->app_tag_mask[1] = 0x0; + + if (!qla2x00_hba_err_chk_enabled(sp)) break; - if (protcnt && (op == SCSI_PROT_WRITE_STRIP || - op == SCSI_PROT_WRITE_PASS)) { - spt = page_address(sg_page(scsi_prot_sglist(cmd))) + - scsi_prot_sglist(cmd)[0].offset; - ql_dbg(ql_dbg_io, vha, 0x3008, - "LBA from user %p, lba = 0x%x for cmd=%p.\n", - spt, (int)spt->ref_tag, cmd); - pkt->ref_tag = swab32(spt->ref_tag); - pkt->app_tag_mask[0] = 0x0; - pkt->app_tag_mask[1] = 0x0; - } else { - pkt->ref_tag = cpu_to_le32((uint32_t) - (0xffffffff & scsi_get_lba(cmd))); - pkt->app_tag = __constant_cpu_to_le16(0); - pkt->app_tag_mask[0] = 0x0; - pkt->app_tag_mask[1] = 0x0; - } /* enable ALL bytes of the ref tag */ pkt->ref_tag_mask[0] = 0xff; pkt->ref_tag_mask[1] = 0xff; @@ -798,8 +791,162 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt, scsi_get_prot_type(cmd), cmd); } +struct qla2_sgx { + dma_addr_t dma_addr; /* OUT */ + uint32_t dma_len; /* OUT */ + + uint32_t tot_bytes; /* IN */ + struct scatterlist *cur_sg; /* IN */ + + /* for book keeping, bzero on initial invocation */ + uint32_t bytes_consumed; + uint32_t num_bytes; + uint32_t tot_partial; + + /* for debugging */ + uint32_t num_sg; + srb_t *sp; +}; static int +qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx, + uint32_t *partial) +{ + struct scatterlist *sg; + uint32_t cumulative_partial, sg_len; + dma_addr_t sg_dma_addr; + + if (sgx->num_bytes == sgx->tot_bytes) + return 0; + + sg = sgx->cur_sg; + cumulative_partial = sgx->tot_partial; + + sg_dma_addr = sg_dma_address(sg); + sg_len = sg_dma_len(sg); + + sgx->dma_addr = sg_dma_addr + sgx->bytes_consumed; + + if ((cumulative_partial + (sg_len - sgx->bytes_consumed)) >= blk_sz) { + sgx->dma_len = (blk_sz - cumulative_partial); + sgx->tot_partial = 0; + sgx->num_bytes += blk_sz; + *partial = 0; + } else { + sgx->dma_len = sg_len - sgx->bytes_consumed; + sgx->tot_partial += sgx->dma_len; + *partial = 1; + } + + sgx->bytes_consumed += sgx->dma_len; + + if (sg_len == sgx->bytes_consumed) { + sg = sg_next(sg); + sgx->num_sg++; + sgx->cur_sg = sg; + sgx->bytes_consumed = 0; + } + + return 1; +} + +static int +qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, + uint32_t *dsd, uint16_t tot_dsds) +{ + void *next_dsd; + uint8_t avail_dsds = 0; + uint32_t dsd_list_len; + struct dsd_dma *dsd_ptr; + struct scatterlist *sg_prot; + uint32_t *cur_dsd = dsd; + uint16_t used_dsds = tot_dsds; + + uint32_t prot_int; + uint32_t partial; + struct qla2_sgx sgx; + dma_addr_t sle_dma; + uint32_t sle_dma_len, tot_prot_dma_len = 0; + struct scsi_cmnd *cmd = sp->cmd; + + prot_int = cmd->device->sector_size; + + memset(&sgx, 0, sizeof(struct qla2_sgx)); + sgx.tot_bytes = scsi_bufflen(sp->cmd); + sgx.cur_sg = scsi_sglist(sp->cmd); + sgx.sp = sp; + + sg_prot = scsi_prot_sglist(sp->cmd); + + while (qla24xx_get_one_block_sg(prot_int, &sgx, &partial)) { + + sle_dma = sgx.dma_addr; + sle_dma_len = sgx.dma_len; +alloc_and_fill: + /* Allocate additional continuation packets? */ + if (avail_dsds == 0) { + avail_dsds = (used_dsds > QLA_DSDS_PER_IOCB) ? + QLA_DSDS_PER_IOCB : used_dsds; + dsd_list_len = (avail_dsds + 1) * 12; + used_dsds -= avail_dsds; + + /* allocate tracking DS */ + dsd_ptr = kzalloc(sizeof(struct dsd_dma), GFP_ATOMIC); + if (!dsd_ptr) + return 1; + + /* allocate new list */ + dsd_ptr->dsd_addr = next_dsd = + dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC, + &dsd_ptr->dsd_list_dma); + + if (!next_dsd) { + /* + * Need to cleanup only this dsd_ptr, rest + * will be done by sp_free_dma() + */ + kfree(dsd_ptr); + return 1; + } + + list_add_tail(&dsd_ptr->list, + &((struct crc_context *)sp->ctx)->dsd_list); + + sp->flags |= SRB_CRC_CTX_DSD_VALID; + + /* add new list to cmd iocb or last list */ + *cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma)); + *cur_dsd++ = cpu_to_le32(MSD(dsd_ptr->dsd_list_dma)); + *cur_dsd++ = dsd_list_len; + cur_dsd = (uint32_t *)next_dsd; + } + *cur_dsd++ = cpu_to_le32(LSD(sle_dma)); + *cur_dsd++ = cpu_to_le32(MSD(sle_dma)); + *cur_dsd++ = cpu_to_le32(sle_dma_len); + avail_dsds--; + + if (partial == 0) { + /* Got a full protection interval */ + sle_dma = sg_dma_address(sg_prot) + tot_prot_dma_len; + sle_dma_len = 8; + + tot_prot_dma_len += sle_dma_len; + if (tot_prot_dma_len == sg_dma_len(sg_prot)) { + tot_prot_dma_len = 0; + sg_prot = sg_next(sg_prot); + } + + partial = 1; /* So as to not re-enter this block */ + goto alloc_and_fill; + } + } + /* Null termination */ + *cur_dsd++ = 0; + *cur_dsd++ = 0; + *cur_dsd++ = 0; + return 0; +} +static int qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, uint16_t tot_dsds) { @@ -981,7 +1128,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, struct scsi_cmnd *cmd; struct scatterlist *cur_seg; int sgc; - uint32_t total_bytes; + uint32_t total_bytes = 0; uint32_t data_bytes; uint32_t dif_bytes; uint8_t bundling = 1; @@ -1023,8 +1170,10 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, __constant_cpu_to_le16(CF_READ_DATA); } - tot_prot_dsds = scsi_prot_sg_count(cmd); - if (!tot_prot_dsds) + if ((scsi_get_prot_op(sp->cmd) == SCSI_PROT_READ_INSERT) || + (scsi_get_prot_op(sp->cmd) == SCSI_PROT_WRITE_STRIP) || + (scsi_get_prot_op(sp->cmd) == SCSI_PROT_READ_STRIP) || + (scsi_get_prot_op(sp->cmd) == SCSI_PROT_WRITE_INSERT)) bundling = 0; /* Allocate CRC context from global pool */ @@ -1047,7 +1196,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, INIT_LIST_HEAD(&crc_ctx_pkt->dsd_list); - qla24xx_set_t10dif_tags(cmd, (struct fw_dif_context *) + qla24xx_set_t10dif_tags(sp, (struct fw_dif_context *) &crc_ctx_pkt->ref_tag, tot_prot_dsds); cmd_pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma)); @@ -1076,7 +1225,6 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, fcp_cmnd->additional_cdb_len |= 2; int_to_scsilun(sp->cmd->device->lun, &fcp_cmnd->lun); - host_to_fcp_swap((uint8_t *)&fcp_cmnd->lun, sizeof(fcp_cmnd->lun)); memcpy(fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len); cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(fcp_cmnd_len); cmd_pkt->fcp_cmnd_dseg_address[0] = cpu_to_le32( @@ -1107,15 +1255,28 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */ /* Compute dif len and adjust data len to incude protection */ - total_bytes = data_bytes; dif_bytes = 0; blk_size = cmd->device->sector_size; - if (scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL) { - dif_bytes = (data_bytes / blk_size) * 8; - total_bytes += dif_bytes; + dif_bytes = (data_bytes / blk_size) * 8; + + switch (scsi_get_prot_op(sp->cmd)) { + case SCSI_PROT_READ_INSERT: + case SCSI_PROT_WRITE_STRIP: + total_bytes = data_bytes; + data_bytes += dif_bytes; + break; + + case SCSI_PROT_READ_STRIP: + case SCSI_PROT_WRITE_INSERT: + case SCSI_PROT_READ_PASS: + case SCSI_PROT_WRITE_PASS: + total_bytes = data_bytes + dif_bytes; + break; + default: + BUG(); } - if (!ql2xenablehba_err_chk) + if (!qla2x00_hba_err_chk_enabled(sp)) fw_prot_opts |= 0x10; /* Disable Guard tag checking */ if (!bundling) { @@ -1151,7 +1312,12 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, cmd_pkt->control_flags |= __constant_cpu_to_le16(CF_DATA_SEG_DESCR_ENABLE); - if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd, + + if (!bundling && tot_prot_dsds) { + if (qla24xx_walk_and_build_sglist_no_difb(ha, sp, + cur_dsd, tot_dsds)) + goto crc_queuing_error; + } else if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd, (tot_dsds - tot_prot_dsds))) goto crc_queuing_error; @@ -1414,6 +1580,22 @@ qla24xx_dif_start_scsi(srb_t *sp) goto queuing_error; else sp->flags |= SRB_DMA_VALID; + + if ((scsi_get_prot_op(cmd) == SCSI_PROT_READ_INSERT) || + (scsi_get_prot_op(cmd) == SCSI_PROT_WRITE_STRIP)) { + struct qla2_sgx sgx; + uint32_t partial; + + memset(&sgx, 0, sizeof(struct qla2_sgx)); + sgx.tot_bytes = scsi_bufflen(cmd); + sgx.cur_sg = scsi_sglist(cmd); + sgx.sp = sp; + + nseg = 0; + while (qla24xx_get_one_block_sg( + cmd->device->sector_size, &sgx, &partial)) + nseg++; + } } else nseg = 0; @@ -1428,6 +1610,11 @@ qla24xx_dif_start_scsi(srb_t *sp) goto queuing_error; else sp->flags |= SRB_CRC_PROT_DMA_VALID; + + if ((scsi_get_prot_op(cmd) == SCSI_PROT_READ_INSERT) || + (scsi_get_prot_op(cmd) == SCSI_PROT_WRITE_STRIP)) { + nseg = scsi_bufflen(cmd) / cmd->device->sector_size; + } } else { nseg = 0; } @@ -1454,6 +1641,7 @@ qla24xx_dif_start_scsi(srb_t *sp) /* Build header part of command packet (excluding the OPCODE). */ req->current_outstanding_cmd = handle; req->outstanding_cmds[handle] = sp; + sp->handle = handle; sp->cmd->host_scribble = (unsigned char *)(unsigned long)handle; req->cnt -= req_cnt; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index b16b7725dee..8a7591f035e 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -719,7 +719,6 @@ skip_rio: vha->flags.rscn_queue_overflow = 1; } - atomic_set(&vha->loop_state, LOOP_UPDATE); atomic_set(&vha->loop_down_timer, 0); vha->flags.management_server_logged_in = 0; @@ -1435,25 +1434,27 @@ struct scsi_dif_tuple { * ASC/ASCQ fields in the sense buffer with ILLEGAL_REQUEST * to indicate to the kernel that the HBA detected error. */ -static inline void +static inline int qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) { struct scsi_qla_host *vha = sp->fcport->vha; struct scsi_cmnd *cmd = sp->cmd; - struct scsi_dif_tuple *ep = - (struct scsi_dif_tuple *)&sts24->data[20]; - struct scsi_dif_tuple *ap = - (struct scsi_dif_tuple *)&sts24->data[12]; + uint8_t *ap = &sts24->data[12]; + uint8_t *ep = &sts24->data[20]; uint32_t e_ref_tag, a_ref_tag; uint16_t e_app_tag, a_app_tag; uint16_t e_guard, a_guard; - e_ref_tag = be32_to_cpu(ep->ref_tag); - a_ref_tag = be32_to_cpu(ap->ref_tag); - e_app_tag = be16_to_cpu(ep->app_tag); - a_app_tag = be16_to_cpu(ap->app_tag); - e_guard = be16_to_cpu(ep->guard); - a_guard = be16_to_cpu(ap->guard); + /* + * swab32 of the "data" field in the beginning of qla2x00_status_entry() + * would make guard field appear at offset 2 + */ + a_guard = le16_to_cpu(*(uint16_t *)(ap + 2)); + a_app_tag = le16_to_cpu(*(uint16_t *)(ap + 0)); + a_ref_tag = le32_to_cpu(*(uint32_t *)(ap + 4)); + e_guard = le16_to_cpu(*(uint16_t *)(ep + 2)); + e_app_tag = le16_to_cpu(*(uint16_t *)(ep + 0)); + e_ref_tag = le32_to_cpu(*(uint32_t *)(ep + 4)); ql_dbg(ql_dbg_io, vha, 0x3023, "iocb(s) %p Returned STATUS.\n", sts24); @@ -1465,6 +1466,63 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) cmd->cmnd[0], (u64)scsi_get_lba(cmd), a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard); + /* + * Ignore sector if: + * For type 3: ref & app tag is all 'f's + * For type 0,1,2: app tag is all 'f's + */ + if ((a_app_tag == 0xffff) && + ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) || + (a_ref_tag == 0xffffffff))) { + uint32_t blocks_done, resid; + sector_t lba_s = scsi_get_lba(cmd); + + /* 2TB boundary case covered automatically with this */ + blocks_done = e_ref_tag - (uint32_t)lba_s + 1; + + resid = scsi_bufflen(cmd) - (blocks_done * + cmd->device->sector_size); + + scsi_set_resid(cmd, resid); + cmd->result = DID_OK << 16; + + /* Update protection tag */ + if (scsi_prot_sg_count(cmd)) { + uint32_t i, j = 0, k = 0, num_ent; + struct scatterlist *sg; + struct sd_dif_tuple *spt; + + /* Patch the corresponding protection tags */ + scsi_for_each_prot_sg(cmd, sg, + scsi_prot_sg_count(cmd), i) { + num_ent = sg_dma_len(sg) / 8; + if (k + num_ent < blocks_done) { + k += num_ent; + continue; + } + j = blocks_done - k - 1; + k = blocks_done; + break; + } + + if (k != blocks_done) { + qla_printk(KERN_WARNING, sp->fcport->vha->hw, + "unexpected tag values tag:lba=%x:%llx)\n", + e_ref_tag, (unsigned long long)lba_s); + return 1; + } + + spt = page_address(sg_page(sg)) + sg->offset; + spt += j; + + spt->app_tag = 0xffff; + if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3) + spt->ref_tag = 0xffffffff; + } + + return 0; + } + /* check guard */ if (e_guard != a_guard) { scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST, @@ -1472,28 +1530,30 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) set_driver_byte(cmd, DRIVER_SENSE); set_host_byte(cmd, DID_ABORT); cmd->result |= SAM_STAT_CHECK_CONDITION << 1; - return; + return 1; } - /* check appl tag */ - if (e_app_tag != a_app_tag) { + /* check ref tag */ + if (e_ref_tag != a_ref_tag) { scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST, - 0x10, 0x2); + 0x10, 0x3); set_driver_byte(cmd, DRIVER_SENSE); set_host_byte(cmd, DID_ABORT); cmd->result |= SAM_STAT_CHECK_CONDITION << 1; - return; + return 1; } - /* check ref tag */ - if (e_ref_tag != a_ref_tag) { + /* check appl tag */ + if (e_app_tag != a_app_tag) { scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST, - 0x10, 0x3); + 0x10, 0x2); set_driver_byte(cmd, DRIVER_SENSE); set_host_byte(cmd, DID_ABORT); cmd->result |= SAM_STAT_CHECK_CONDITION << 1; - return; + return 1; } + + return 1; } /** @@ -1767,7 +1827,7 @@ check_scsi_status: break; case CS_DIF_ERROR: - qla2x00_handle_dif_error(sp, sts24); + logit = qla2x00_handle_dif_error(sp, sts24); break; default: cp->result = DID_ERROR << 16; @@ -2468,11 +2528,10 @@ qla2x00_request_irqs(struct qla_hw_data *ha, struct rsp_que *rsp) goto skip_msi; } - if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX || - !QLA_MSIX_FW_MODE_1(ha->fw_attributes))) { + if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX)) { ql_log(ql_log_warn, vha, 0x0035, "MSI-X; Unsupported ISP2432 (0x%X, 0x%X).\n", - ha->pdev->revision, ha->fw_attributes); + ha->pdev->revision, QLA_MSIX_CHIP_REV_24XX); goto skip_msix; } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index c706ed37000..f488cc69fc7 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -472,7 +472,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport) host->can_queue = base_vha->req->length + 128; host->this_id = 255; host->cmd_per_lun = 3; - if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) + if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) host->max_cmd_len = 32; else host->max_cmd_len = MAX_CMDSZ; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 5cbf33a50b1..049807cda41 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -2208,6 +2208,7 @@ qla82xx_msix_rsp_q(int irq, void *dev_id) struct qla_hw_data *ha; struct rsp_que *rsp; struct device_reg_82xx __iomem *reg; + unsigned long flags; rsp = (struct rsp_que *) dev_id; if (!rsp) { @@ -2218,11 +2219,11 @@ qla82xx_msix_rsp_q(int irq, void *dev_id) ha = rsp->hw; reg = &ha->iobase->isp82; - spin_lock_irq(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); qla24xx_process_response_queue(vha, rsp); WRT_REG_DWORD(®->host_int, 0); - spin_unlock_irq(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return IRQ_HANDLED; } @@ -2838,6 +2839,16 @@ sufficient_dsds: int_to_scsilun(sp->cmd->device->lun, &cmd_pkt->lun); host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun)); + /* build FCP_CMND IU */ + memset(ctx->fcp_cmnd, 0, sizeof(struct fcp_cmnd)); + int_to_scsilun(sp->cmd->device->lun, &ctx->fcp_cmnd->lun); + ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len; + + if (cmd->sc_data_direction == DMA_TO_DEVICE) + ctx->fcp_cmnd->additional_cdb_len |= 1; + else if (cmd->sc_data_direction == DMA_FROM_DEVICE) + ctx->fcp_cmnd->additional_cdb_len |= 2; + /* * Update tagged queuing modifier -- default is TSK_SIMPLE (0). */ @@ -2854,16 +2865,6 @@ sufficient_dsds: } } - /* build FCP_CMND IU */ - memset(ctx->fcp_cmnd, 0, sizeof(struct fcp_cmnd)); - int_to_scsilun(sp->cmd->device->lun, &ctx->fcp_cmnd->lun); - ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len; - - if (cmd->sc_data_direction == DMA_TO_DEVICE) - ctx->fcp_cmnd->additional_cdb_len |= 1; - else if (cmd->sc_data_direction == DMA_FROM_DEVICE) - ctx->fcp_cmnd->additional_cdb_len |= 2; - memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len); fcp_dl = (uint32_t *)(ctx->fcp_cmnd->cdb + 16 + diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index e02df276804..1e69527f1e4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -106,17 +106,21 @@ MODULE_PARM_DESC(ql2xmaxqdepth, "Maximum queue depth to report for target devices."); /* Do not change the value of this after module load */ -int ql2xenabledif = 1; +int ql2xenabledif = 0; module_param(ql2xenabledif, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(ql2xenabledif, " Enable T10-CRC-DIF " - " Default is 0 - No DIF Support. 1 - Enable it"); + " Default is 0 - No DIF Support. 1 - Enable it" + ", 2 - Enable DIF for all types, except Type 0."); -int ql2xenablehba_err_chk; +int ql2xenablehba_err_chk = 2; module_param(ql2xenablehba_err_chk, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(ql2xenablehba_err_chk, - " Enable T10-CRC-DIF Error isolation by HBA" - " Default is 0 - Error isolation disabled, 1 - Enable it"); + " Enable T10-CRC-DIF Error isolation by HBA:\n" + " Default is 1.\n" + " 0 -- Error isolation disabled\n" + " 1 -- Error isolation enabled only for DIX Type 0\n" + " 2 -- Error isolation enabled for all Types\n"); int ql2xiidmaenable=1; module_param(ql2xiidmaenable, int, S_IRUGO); @@ -909,7 +913,14 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) "Abort command mbx success.\n"); wait = 1; } + + spin_lock_irqsave(&ha->hardware_lock, flags); qla2x00_sp_compl(ha, sp); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + /* Did the command return during mailbox execution? */ + if (ret == FAILED && !CMD_SP(cmd)) + ret = SUCCESS; /* Wait for the command to be returned. */ if (wait) { @@ -1317,10 +1328,9 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) qla2x00_sp_compl(ha, sp); } else { ctx = sp->ctx; - if (ctx->type == SRB_LOGIN_CMD || - ctx->type == SRB_LOGOUT_CMD) { - ctx->u.iocb_cmd->free(sp); - } else { + if (ctx->type == SRB_ELS_CMD_RPT || + ctx->type == SRB_ELS_CMD_HST || + ctx->type == SRB_CT_CMD) { struct fc_bsg_job *bsg_job = ctx->u.bsg_job; if (bsg_job->request->msgcode @@ -1332,6 +1342,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) kfree(sp->ctx); mempool_free(sp, ha->srb_mempool); + } else { + ctx->u.iocb_cmd->free(sp); } } } @@ -2251,7 +2263,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->this_id = 255; host->cmd_per_lun = 3; host->unique_id = host->host_no; - if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) + if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) host->max_cmd_len = 32; else host->max_cmd_len = MAX_CMDSZ; @@ -2378,13 +2390,16 @@ skip_dpc: "Detected hba at address=%p.\n", ha); - if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) { + if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) { + int prot = 0; base_vha->flags.difdix_supported = 1; ql_dbg(ql_dbg_init, base_vha, 0x00f1, "Registering for DIF/DIX type 1 and 3 protection.\n"); + if (ql2xenabledif == 1) + prot = SHOST_DIX_TYPE0_PROTECTION; scsi_host_set_prot(host, - SHOST_DIF_TYPE1_PROTECTION + prot | SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION | SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 062c97bf62f..13b6357c1fa 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,7 +7,7 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.03.07.03-k" +#define QLA2XXX_VERSION "8.03.07.07-k" #define QLA_DRIVER_MAJOR_VER 8 #define QLA_DRIVER_MINOR_VER 3 diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index d2407558773..24cacff5778 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -825,6 +825,9 @@ static void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi) { struct device *dev = mspi->dev; + if (!(mspi->flags & SPI_CPM_MODE)) + return; + dma_unmap_single(dev, mspi->dma_dummy_rx, SPI_MRBLR, DMA_FROM_DEVICE); dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE); cpm_muram_free(cpm_muram_offset(mspi->tx_bd)); diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 8ac6542aedc..fa594d604ac 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -786,9 +786,11 @@ static int __devinit spi_imx_probe(struct platform_device *pdev) int cs_gpio = of_get_named_gpio(np, "cs-gpios", i); if (cs_gpio < 0) cs_gpio = mxc_platform_info->chipselect[i]; + + spi_imx->chipselect[i] = cs_gpio; if (cs_gpio < 0) continue; - spi_imx->chipselect[i] = cs_gpio; + ret = gpio_request(spi_imx->chipselect[i], DRIVER_NAME); if (ret) { while (i > 0) { diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 1d23f383186..6a80749391d 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -50,6 +50,8 @@ #define PCH_RX_THOLD 7 #define PCH_RX_THOLD_MAX 15 +#define PCH_TX_THOLD 2 + #define PCH_MAX_BAUDRATE 5000000 #define PCH_MAX_FIFO_DEPTH 16 @@ -58,6 +60,7 @@ #define PCH_SLEEP_TIME 10 #define SSN_LOW 0x02U +#define SSN_HIGH 0x03U #define SSN_NO_CONTROL 0x00U #define PCH_MAX_CS 0xFF #define PCI_DEVICE_ID_GE_SPI 0x8816 @@ -316,16 +319,19 @@ static void pch_spi_handler_sub(struct pch_spi_data *data, u32 reg_spsr_val, /* if transfer complete interrupt */ if (reg_spsr_val & SPSR_FI_BIT) { - if (tx_index < bpw_len) + if ((tx_index == bpw_len) && (rx_index == tx_index)) { + /* disable interrupts */ + pch_spi_setclr_reg(data->master, PCH_SPCR, 0, PCH_ALL); + + /* transfer is completed; + inform pch_spi_process_messages */ + data->transfer_complete = true; + data->transfer_active = false; + wake_up(&data->wait); + } else { dev_err(&data->master->dev, "%s : Transfer is not completed", __func__); - /* disable interrupts */ - pch_spi_setclr_reg(data->master, PCH_SPCR, 0, PCH_ALL); - - /* transfer is completed;inform pch_spi_process_messages */ - data->transfer_complete = true; - data->transfer_active = false; - wake_up(&data->wait); + } } } @@ -348,16 +354,26 @@ static irqreturn_t pch_spi_handler(int irq, void *dev_id) "%s returning due to suspend\n", __func__); return IRQ_NONE; } - if (data->use_dma) - return IRQ_NONE; io_remap_addr = data->io_remap_addr; spsr = io_remap_addr + PCH_SPSR; reg_spsr_val = ioread32(spsr); - if (reg_spsr_val & SPSR_ORF_BIT) - dev_err(&board_dat->pdev->dev, "%s Over run error", __func__); + if (reg_spsr_val & SPSR_ORF_BIT) { + dev_err(&board_dat->pdev->dev, "%s Over run error\n", __func__); + if (data->current_msg->complete != 0) { + data->transfer_complete = true; + data->current_msg->status = -EIO; + data->current_msg->complete(data->current_msg->context); + data->bcurrent_msg_processing = false; + data->current_msg = NULL; + data->cur_trans = NULL; + } + } + + if (data->use_dma) + return IRQ_NONE; /* Check if the interrupt is for SPI device */ if (reg_spsr_val & (SPSR_FI_BIT | SPSR_RFI_BIT)) { @@ -756,10 +772,6 @@ static void pch_spi_set_ir(struct pch_spi_data *data) wait_event_interruptible(data->wait, data->transfer_complete); - pch_spi_writereg(data->master, PCH_SSNXCR, SSN_NO_CONTROL); - dev_dbg(&data->master->dev, - "%s:no more control over SSN-writing 0 to SSNXCR.", __func__); - /* clear all interrupts */ pch_spi_writereg(data->master, PCH_SPSR, pch_spi_readreg(data->master, PCH_SPSR)); @@ -815,10 +827,11 @@ static void pch_spi_copy_rx_data_for_dma(struct pch_spi_data *data, int bpw) } } -static void pch_spi_start_transfer(struct pch_spi_data *data) +static int pch_spi_start_transfer(struct pch_spi_data *data) { struct pch_spi_dma_ctrl *dma; unsigned long flags; + int rtn; dma = &data->dma; @@ -833,19 +846,23 @@ static void pch_spi_start_transfer(struct pch_spi_data *data) initiating the transfer. */ dev_dbg(&data->master->dev, "%s:waiting for transfer to get over\n", __func__); - wait_event_interruptible(data->wait, data->transfer_complete); + rtn = wait_event_interruptible_timeout(data->wait, + data->transfer_complete, + msecs_to_jiffies(2 * HZ)); dma_sync_sg_for_cpu(&data->master->dev, dma->sg_rx_p, dma->nent, DMA_FROM_DEVICE); + + dma_sync_sg_for_cpu(&data->master->dev, dma->sg_tx_p, dma->nent, + DMA_FROM_DEVICE); + memset(data->dma.tx_buf_virt, 0, PAGE_SIZE); + async_tx_ack(dma->desc_rx); async_tx_ack(dma->desc_tx); kfree(dma->sg_tx_p); kfree(dma->sg_rx_p); spin_lock_irqsave(&data->lock, flags); - pch_spi_writereg(data->master, PCH_SSNXCR, SSN_NO_CONTROL); - dev_dbg(&data->master->dev, - "%s:no more control over SSN-writing 0 to SSNXCR.", __func__); /* clear fifo threshold, disable interrupts, disable SPI transfer */ pch_spi_setclr_reg(data->master, PCH_SPCR, 0, @@ -858,6 +875,8 @@ static void pch_spi_start_transfer(struct pch_spi_data *data) pch_spi_clear_fifo(data->master); spin_unlock_irqrestore(&data->lock, flags); + + return rtn; } static void pch_dma_rx_complete(void *arg) @@ -1023,8 +1042,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) /* set receive fifo threshold and transmit fifo threshold */ pch_spi_setclr_reg(data->master, PCH_SPCR, ((size - 1) << SPCR_RFIC_FIELD) | - ((PCH_MAX_FIFO_DEPTH - PCH_DMA_TRANS_SIZE) << - SPCR_TFIC_FIELD), + (PCH_TX_THOLD << SPCR_TFIC_FIELD), MASK_RFIC_SPCR_BITS | MASK_TFIC_SPCR_BITS); spin_unlock_irqrestore(&data->lock, flags); @@ -1035,13 +1053,20 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) /* offset, length setting */ sg = dma->sg_rx_p; for (i = 0; i < num; i++, sg++) { - if (i == 0) { - sg->offset = 0; + if (i == (num - 2)) { + sg->offset = size * i; + sg->offset = sg->offset * (*bpw / 8); sg_set_page(sg, virt_to_page(dma->rx_buf_virt), rem, sg->offset); sg_dma_len(sg) = rem; + } else if (i == (num - 1)) { + sg->offset = size * (i - 1) + rem; + sg->offset = sg->offset * (*bpw / 8); + sg_set_page(sg, virt_to_page(dma->rx_buf_virt), size, + sg->offset); + sg_dma_len(sg) = size; } else { - sg->offset = rem + size * (i - 1); + sg->offset = size * i; sg->offset = sg->offset * (*bpw / 8); sg_set_page(sg, virt_to_page(dma->rx_buf_virt), size, sg->offset); @@ -1065,6 +1090,16 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) dma->desc_rx = desc_rx; /* TX */ + if (data->bpw_len > PCH_DMA_TRANS_SIZE) { + num = data->bpw_len / PCH_DMA_TRANS_SIZE; + size = PCH_DMA_TRANS_SIZE; + rem = 16; + } else { + num = 1; + size = data->bpw_len; + rem = data->bpw_len; + } + dma->sg_tx_p = kzalloc(sizeof(struct scatterlist)*num, GFP_ATOMIC); sg_init_table(dma->sg_tx_p, num); /* Initialize SG table */ /* offset, length setting */ @@ -1162,6 +1197,7 @@ static void pch_spi_process_messages(struct work_struct *pwork) if (data->use_dma) pch_spi_request_dma(data, data->current_msg->spi->bits_per_word); + pch_spi_writereg(data->master, PCH_SSNXCR, SSN_NO_CONTROL); do { /* If we are already processing a message get the next transfer structure from the message otherwise retrieve @@ -1184,7 +1220,8 @@ static void pch_spi_process_messages(struct work_struct *pwork) if (data->use_dma) { pch_spi_handle_dma(data, &bpw); - pch_spi_start_transfer(data); + if (!pch_spi_start_transfer(data)) + goto out; pch_spi_copy_rx_data_for_dma(data, bpw); } else { pch_spi_set_tx(data, &bpw); @@ -1222,6 +1259,8 @@ static void pch_spi_process_messages(struct work_struct *pwork) } while (data->cur_trans != NULL); +out: + pch_spi_writereg(data->master, PCH_SSNXCR, SSN_HIGH); if (data->use_dma) pch_spi_release_dma(data); } diff --git a/drivers/staging/comedi/drivers/ni_labpc.c b/drivers/staging/comedi/drivers/ni_labpc.c index 6859af0778c..7611def97d0 100644 --- a/drivers/staging/comedi/drivers/ni_labpc.c +++ b/drivers/staging/comedi/drivers/ni_labpc.c @@ -241,8 +241,10 @@ static int labpc_eeprom_write_insn(struct comedi_device *dev, struct comedi_insn *insn, unsigned int *data); static void labpc_adc_timing(struct comedi_device *dev, struct comedi_cmd *cmd); -#ifdef CONFIG_COMEDI_PCI +#ifdef CONFIG_ISA_DMA_API static unsigned int labpc_suggest_transfer_size(struct comedi_cmd cmd); +#endif +#ifdef CONFIG_COMEDI_PCI static int labpc_find_device(struct comedi_device *dev, int bus, int slot); #endif static int labpc_dio_mem_callback(int dir, int port, int data, diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index 1a7c19ae766..8b307b42879 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c @@ -411,7 +411,8 @@ static int cvm_oct_napi_poll(struct napi_struct *napi, int budget) skb->protocol = eth_type_trans(skb, dev); skb->dev = dev; - if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc || work->word2.s.L4_error)) + if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc || + work->word2.s.L4_error || !work->word2.s.tcp_or_udp)) skb->ip_summed = CHECKSUM_NONE; else skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index a3f5162bfed..462fbc20561 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1242,7 +1242,7 @@ static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw, int ret = 0; BUG_ON(!is_ephemeral(pool)); - zbud_decompress(virt_to_page(data), pampd); + zbud_decompress((struct page *)(data), pampd); zbud_free_and_delist((struct zbud_hdr *)pampd); atomic_dec(&zcache_curr_eph_pampd_count); return ret; diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 497b2e718a7..5b773160200 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -1430,7 +1430,7 @@ static int iscsi_enforce_integrity_rules( u8 DataSequenceInOrder = 0; u8 ErrorRecoveryLevel = 0, SessionType = 0; u8 IFMarker = 0, OFMarker = 0; - u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0; + u8 IFMarkInt_Reject = 1, OFMarkInt_Reject = 1; u32 FirstBurstLength = 0, MaxBurstLength = 0; struct iscsi_param *param = NULL; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index a0d23bc0fc9..f00137f377b 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -875,40 +875,6 @@ void iscsit_inc_session_usage_count(struct iscsi_session *sess) } /* - * Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker - * array counts needed for sync and steering. - */ -static int iscsit_determine_sync_and_steering_counts( - struct iscsi_conn *conn, - struct iscsi_data_count *count) -{ - u32 length = count->data_length; - u32 marker, markint; - - count->sync_and_steering = 1; - - marker = (count->type == ISCSI_RX_DATA) ? - conn->of_marker : conn->if_marker; - markint = (count->type == ISCSI_RX_DATA) ? - (conn->conn_ops->OFMarkInt * 4) : - (conn->conn_ops->IFMarkInt * 4); - count->ss_iov_count = count->iov_count; - - while (length > 0) { - if (length >= marker) { - count->ss_iov_count += 3; - count->ss_marker_count += 2; - - length -= marker; - marker = markint; - } else - length = 0; - } - - return 0; -} - -/* * Setup conn->if_marker and conn->of_marker values based upon * the initial marker-less interval. (see iSCSI v19 A.2) */ @@ -1290,7 +1256,7 @@ int iscsit_fe_sendpage_sg( struct kvec iov; u32 tx_hdr_size, data_len; u32 offset = cmd->first_data_sg_off; - int tx_sent; + int tx_sent, iov_off; send_hdr: tx_hdr_size = ISCSI_HDR_LEN; @@ -1310,9 +1276,19 @@ send_hdr: } data_len = cmd->tx_size - tx_hdr_size - cmd->padding; - if (conn->conn_ops->DataDigest) + /* + * Set iov_off used by padding and data digest tx_data() calls below + * in order to determine proper offset into cmd->iov_data[] + */ + if (conn->conn_ops->DataDigest) { data_len -= ISCSI_CRC_LEN; - + if (cmd->padding) + iov_off = (cmd->iov_data_count - 2); + else + iov_off = (cmd->iov_data_count - 1); + } else { + iov_off = (cmd->iov_data_count - 1); + } /* * Perform sendpage() for each page in the scatterlist */ @@ -1341,8 +1317,7 @@ send_pg: send_padding: if (cmd->padding) { - struct kvec *iov_p = - &cmd->iov_data[cmd->iov_data_count-1]; + struct kvec *iov_p = &cmd->iov_data[iov_off++]; tx_sent = tx_data(conn, iov_p, 1, cmd->padding); if (cmd->padding != tx_sent) { @@ -1356,8 +1331,7 @@ send_padding: send_datacrc: if (conn->conn_ops->DataDigest) { - struct kvec *iov_d = - &cmd->iov_data[cmd->iov_data_count]; + struct kvec *iov_d = &cmd->iov_data[iov_off]; tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN); if (ISCSI_CRC_LEN != tx_sent) { @@ -1431,8 +1405,7 @@ static int iscsit_do_rx_data( struct iscsi_data_count *count) { int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len; - u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0; - struct kvec iov[count->ss_iov_count], *iov_p; + struct kvec *iov_p; struct msghdr msg; if (!conn || !conn->sock || !conn->conn_ops) @@ -1440,93 +1413,8 @@ static int iscsit_do_rx_data( memset(&msg, 0, sizeof(struct msghdr)); - if (count->sync_and_steering) { - int size = 0; - u32 i, orig_iov_count = 0; - u32 orig_iov_len = 0, orig_iov_loc = 0; - u32 iov_count = 0, per_iov_bytes = 0; - u32 *rx_marker, old_rx_marker = 0; - struct kvec *iov_record; - - memset(&rx_marker_val, 0, - count->ss_marker_count * sizeof(u32)); - memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec)); - - iov_record = count->iov; - orig_iov_count = count->iov_count; - rx_marker = &conn->of_marker; - - i = 0; - size = data; - orig_iov_len = iov_record[orig_iov_loc].iov_len; - while (size > 0) { - pr_debug("rx_data: #1 orig_iov_len %u," - " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc); - pr_debug("rx_data: #2 rx_marker %u, size" - " %u\n", *rx_marker, size); - - if (orig_iov_len >= *rx_marker) { - iov[iov_count].iov_len = *rx_marker; - iov[iov_count++].iov_base = - (iov_record[orig_iov_loc].iov_base + - per_iov_bytes); - - iov[iov_count].iov_len = (MARKER_SIZE / 2); - iov[iov_count++].iov_base = - &rx_marker_val[rx_marker_iov++]; - iov[iov_count].iov_len = (MARKER_SIZE / 2); - iov[iov_count++].iov_base = - &rx_marker_val[rx_marker_iov++]; - old_rx_marker = *rx_marker; - - /* - * OFMarkInt is in 32-bit words. - */ - *rx_marker = (conn->conn_ops->OFMarkInt * 4); - size -= old_rx_marker; - orig_iov_len -= old_rx_marker; - per_iov_bytes += old_rx_marker; - - pr_debug("rx_data: #3 new_rx_marker" - " %u, size %u\n", *rx_marker, size); - } else { - iov[iov_count].iov_len = orig_iov_len; - iov[iov_count++].iov_base = - (iov_record[orig_iov_loc].iov_base + - per_iov_bytes); - - per_iov_bytes = 0; - *rx_marker -= orig_iov_len; - size -= orig_iov_len; - - if (size) - orig_iov_len = - iov_record[++orig_iov_loc].iov_len; - - pr_debug("rx_data: #4 new_rx_marker" - " %u, size %u\n", *rx_marker, size); - } - } - data += (rx_marker_iov * (MARKER_SIZE / 2)); - - iov_p = &iov[0]; - iov_len = iov_count; - - if (iov_count > count->ss_iov_count) { - pr_err("iov_count: %d, count->ss_iov_count:" - " %d\n", iov_count, count->ss_iov_count); - return -1; - } - if (rx_marker_iov > count->ss_marker_count) { - pr_err("rx_marker_iov: %d, count->ss_marker" - "_count: %d\n", rx_marker_iov, - count->ss_marker_count); - return -1; - } - } else { - iov_p = count->iov; - iov_len = count->iov_count; - } + iov_p = count->iov; + iov_len = count->iov_count; while (total_rx < data) { rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len, @@ -1541,16 +1429,6 @@ static int iscsit_do_rx_data( rx_loop, total_rx, data); } - if (count->sync_and_steering) { - int j; - for (j = 0; j < rx_marker_iov; j++) { - pr_debug("rx_data: #5 j: %d, offset: %d\n", - j, rx_marker_val[j]); - conn->of_marker_offset = rx_marker_val[j]; - } - total_rx -= (rx_marker_iov * (MARKER_SIZE / 2)); - } - return total_rx; } @@ -1559,8 +1437,7 @@ static int iscsit_do_tx_data( struct iscsi_data_count *count) { int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len; - u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0; - struct kvec iov[count->ss_iov_count], *iov_p; + struct kvec *iov_p; struct msghdr msg; if (!conn || !conn->sock || !conn->conn_ops) @@ -1573,98 +1450,8 @@ static int iscsit_do_tx_data( memset(&msg, 0, sizeof(struct msghdr)); - if (count->sync_and_steering) { - int size = 0; - u32 i, orig_iov_count = 0; - u32 orig_iov_len = 0, orig_iov_loc = 0; - u32 iov_count = 0, per_iov_bytes = 0; - u32 *tx_marker, old_tx_marker = 0; - struct kvec *iov_record; - - memset(&tx_marker_val, 0, - count->ss_marker_count * sizeof(u32)); - memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec)); - - iov_record = count->iov; - orig_iov_count = count->iov_count; - tx_marker = &conn->if_marker; - - i = 0; - size = data; - orig_iov_len = iov_record[orig_iov_loc].iov_len; - while (size > 0) { - pr_debug("tx_data: #1 orig_iov_len %u," - " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc); - pr_debug("tx_data: #2 tx_marker %u, size" - " %u\n", *tx_marker, size); - - if (orig_iov_len >= *tx_marker) { - iov[iov_count].iov_len = *tx_marker; - iov[iov_count++].iov_base = - (iov_record[orig_iov_loc].iov_base + - per_iov_bytes); - - tx_marker_val[tx_marker_iov] = - (size - *tx_marker); - iov[iov_count].iov_len = (MARKER_SIZE / 2); - iov[iov_count++].iov_base = - &tx_marker_val[tx_marker_iov++]; - iov[iov_count].iov_len = (MARKER_SIZE / 2); - iov[iov_count++].iov_base = - &tx_marker_val[tx_marker_iov++]; - old_tx_marker = *tx_marker; - - /* - * IFMarkInt is in 32-bit words. - */ - *tx_marker = (conn->conn_ops->IFMarkInt * 4); - size -= old_tx_marker; - orig_iov_len -= old_tx_marker; - per_iov_bytes += old_tx_marker; - - pr_debug("tx_data: #3 new_tx_marker" - " %u, size %u\n", *tx_marker, size); - pr_debug("tx_data: #4 offset %u\n", - tx_marker_val[tx_marker_iov-1]); - } else { - iov[iov_count].iov_len = orig_iov_len; - iov[iov_count++].iov_base - = (iov_record[orig_iov_loc].iov_base + - per_iov_bytes); - - per_iov_bytes = 0; - *tx_marker -= orig_iov_len; - size -= orig_iov_len; - - if (size) - orig_iov_len = - iov_record[++orig_iov_loc].iov_len; - - pr_debug("tx_data: #5 new_tx_marker" - " %u, size %u\n", *tx_marker, size); - } - } - - data += (tx_marker_iov * (MARKER_SIZE / 2)); - - iov_p = &iov[0]; - iov_len = iov_count; - - if (iov_count > count->ss_iov_count) { - pr_err("iov_count: %d, count->ss_iov_count:" - " %d\n", iov_count, count->ss_iov_count); - return -1; - } - if (tx_marker_iov > count->ss_marker_count) { - pr_err("tx_marker_iov: %d, count->ss_marker" - "_count: %d\n", tx_marker_iov, - count->ss_marker_count); - return -1; - } - } else { - iov_p = count->iov; - iov_len = count->iov_count; - } + iov_p = count->iov; + iov_len = count->iov_count; while (total_tx < data) { tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, @@ -1679,9 +1466,6 @@ static int iscsit_do_tx_data( tx_loop, total_tx, data); } - if (count->sync_and_steering) - total_tx -= (tx_marker_iov * (MARKER_SIZE / 2)); - return total_tx; } @@ -1702,12 +1486,6 @@ int rx_data( c.data_length = data; c.type = ISCSI_RX_DATA; - if (conn->conn_ops->OFMarker && - (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) { - if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0) - return -1; - } - return iscsit_do_rx_data(conn, &c); } @@ -1728,12 +1506,6 @@ int tx_data( c.data_length = data; c.type = ISCSI_TX_DATA; - if (conn->conn_ops->IFMarker && - (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) { - if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0) - return -1; - } - return iscsit_do_tx_data(conn, &c); } diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 89ae923c5da..f04d4ef99dc 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -24,6 +24,7 @@ */ #include <linux/kernel.h> +#include <linux/ctype.h> #include <asm/unaligned.h> #include <scsi/scsi.h> @@ -154,6 +155,37 @@ target_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf) return 0; } +static void +target_parse_naa_6h_vendor_specific(struct se_device *dev, unsigned char *buf_off) +{ + unsigned char *p = &dev->se_sub_dev->t10_wwn.unit_serial[0]; + unsigned char *buf = buf_off; + int cnt = 0, next = 1; + /* + * Generate up to 36 bits of VENDOR SPECIFIC IDENTIFIER starting on + * byte 3 bit 3-0 for NAA IEEE Registered Extended DESIGNATOR field + * format, followed by 64 bits of VENDOR SPECIFIC IDENTIFIER EXTENSION + * to complete the payload. These are based from VPD=0x80 PRODUCT SERIAL + * NUMBER set via vpd_unit_serial in target_core_configfs.c to ensure + * per device uniqeness. + */ + while (*p != '\0') { + if (cnt >= 13) + break; + if (!isxdigit(*p)) { + p++; + continue; + } + if (next != 0) { + buf[cnt++] |= hex_to_bin(*p++); + next = 0; + } else { + buf[cnt] = hex_to_bin(*p++) << 4; + next = 1; + } + } +} + /* * Device identification VPD, for a complete list of * DESIGNATOR TYPEs see spc4r17 Table 459. @@ -219,8 +251,7 @@ target_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf) * VENDOR_SPECIFIC_IDENTIFIER and * VENDOR_SPECIFIC_IDENTIFIER_EXTENTION */ - buf[off++] |= hex_to_bin(dev->se_sub_dev->t10_wwn.unit_serial[0]); - hex2bin(&buf[off], &dev->se_sub_dev->t10_wwn.unit_serial[1], 12); + target_parse_naa_6h_vendor_specific(dev, &buf[off]); len = 20; off = (len + 4); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 8d0c58ea631..a4b0a8d27f2 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -977,15 +977,17 @@ static void target_qf_do_work(struct work_struct *work) { struct se_device *dev = container_of(work, struct se_device, qf_work_queue); + LIST_HEAD(qf_cmd_list); struct se_cmd *cmd, *cmd_tmp; spin_lock_irq(&dev->qf_cmd_lock); - list_for_each_entry_safe(cmd, cmd_tmp, &dev->qf_cmd_list, se_qf_node) { + list_splice_init(&dev->qf_cmd_list, &qf_cmd_list); + spin_unlock_irq(&dev->qf_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) { list_del(&cmd->se_qf_node); atomic_dec(&dev->dev_qf_count); smp_mb__after_atomic_dec(); - spin_unlock_irq(&dev->qf_cmd_lock); pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue" " context: %s\n", cmd->se_tfo->get_fabric_name(), cmd, @@ -997,10 +999,7 @@ static void target_qf_do_work(struct work_struct *work) * has been added to head of queue */ transport_add_cmd_to_queue(cmd, cmd->t_state); - - spin_lock_irq(&dev->qf_cmd_lock); } - spin_unlock_irq(&dev->qf_cmd_lock); } unsigned char *transport_dump_cmd_direction(struct se_cmd *cmd) diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index bd4fe21a23b..3749d8b4b42 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -98,8 +98,7 @@ struct ft_tpg { struct list_head list; /* linkage in ft_lport_acl tpg_list */ struct list_head lun_list; /* head of LUNs */ struct se_portal_group se_tpg; - struct task_struct *thread; /* processing thread */ - struct se_queue_obj qobj; /* queue for processing thread */ + struct workqueue_struct *workqueue; }; struct ft_lport_acl { @@ -110,16 +109,10 @@ struct ft_lport_acl { struct se_wwn fc_lport_wwn; }; -enum ft_cmd_state { - FC_CMD_ST_NEW = 0, - FC_CMD_ST_REJ -}; - /* * Commands */ struct ft_cmd { - enum ft_cmd_state state; u32 lun; /* LUN from request */ struct ft_sess *sess; /* session held for cmd */ struct fc_seq *seq; /* sequence in exchange mgr */ @@ -127,7 +120,7 @@ struct ft_cmd { struct fc_frame *req_frame; unsigned char *cdb; /* pointer to CDB inside frame */ u32 write_data_len; /* data received on writes */ - struct se_queue_req se_req; + struct work_struct work; /* Local sense buffer */ unsigned char ft_sense_buffer[TRANSPORT_SENSE_BUFFER]; u32 was_ddp_setup:1; /* Set only if ddp is setup */ @@ -177,7 +170,6 @@ int ft_is_state_remove(struct se_cmd *); /* * other internal functions. */ -int ft_thread(void *); void ft_recv_req(struct ft_sess *, struct fc_frame *); struct ft_tpg *ft_lport_find_tpg(struct fc_lport *); struct ft_node_acl *ft_acl_get(struct ft_tpg *, struct fc_rport_priv *); diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 5654dc22f7a..80fbcde00cb 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -62,8 +62,8 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller) int count; se_cmd = &cmd->se_cmd; - pr_debug("%s: cmd %p state %d sess %p seq %p se_cmd %p\n", - caller, cmd, cmd->state, cmd->sess, cmd->seq, se_cmd); + pr_debug("%s: cmd %p sess %p seq %p se_cmd %p\n", + caller, cmd, cmd->sess, cmd->seq, se_cmd); pr_debug("%s: cmd %p cdb %p\n", caller, cmd, cmd->cdb); pr_debug("%s: cmd %p lun %d\n", caller, cmd, cmd->lun); @@ -90,38 +90,6 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller) 16, 4, cmd->cdb, MAX_COMMAND_SIZE, 0); } -static void ft_queue_cmd(struct ft_sess *sess, struct ft_cmd *cmd) -{ - struct ft_tpg *tpg = sess->tport->tpg; - struct se_queue_obj *qobj = &tpg->qobj; - unsigned long flags; - - qobj = &sess->tport->tpg->qobj; - spin_lock_irqsave(&qobj->cmd_queue_lock, flags); - list_add_tail(&cmd->se_req.qr_list, &qobj->qobj_list); - atomic_inc(&qobj->queue_cnt); - spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags); - - wake_up_process(tpg->thread); -} - -static struct ft_cmd *ft_dequeue_cmd(struct se_queue_obj *qobj) -{ - unsigned long flags; - struct se_queue_req *qr; - - spin_lock_irqsave(&qobj->cmd_queue_lock, flags); - if (list_empty(&qobj->qobj_list)) { - spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags); - return NULL; - } - qr = list_first_entry(&qobj->qobj_list, struct se_queue_req, qr_list); - list_del(&qr->qr_list); - atomic_dec(&qobj->queue_cnt); - spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags); - return container_of(qr, struct ft_cmd, se_req); -} - static void ft_free_cmd(struct ft_cmd *cmd) { struct fc_frame *fp; @@ -282,9 +250,7 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd) int ft_get_cmd_state(struct se_cmd *se_cmd) { - struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); - - return cmd->state; + return 0; } int ft_is_state_remove(struct se_cmd *se_cmd) @@ -505,6 +471,8 @@ int ft_queue_tm_resp(struct se_cmd *se_cmd) return 0; } +static void ft_send_work(struct work_struct *work); + /* * Handle incoming FCP command. */ @@ -523,7 +491,9 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp) goto busy; } cmd->req_frame = fp; /* hold frame during cmd */ - ft_queue_cmd(sess, cmd); + + INIT_WORK(&cmd->work, ft_send_work); + queue_work(sess->tport->tpg->workqueue, &cmd->work); return; busy: @@ -563,12 +533,13 @@ void ft_recv_req(struct ft_sess *sess, struct fc_frame *fp) /* * Send new command to target. */ -static void ft_send_cmd(struct ft_cmd *cmd) +static void ft_send_work(struct work_struct *work) { + struct ft_cmd *cmd = container_of(work, struct ft_cmd, work); struct fc_frame_header *fh = fc_frame_header_get(cmd->req_frame); struct se_cmd *se_cmd; struct fcp_cmnd *fcp; - int data_dir; + int data_dir = 0; u32 data_len; int task_attr; int ret; @@ -675,42 +646,3 @@ static void ft_send_cmd(struct ft_cmd *cmd) err: ft_send_resp_code_and_free(cmd, FCP_CMND_FIELDS_INVALID); } - -/* - * Handle request in the command thread. - */ -static void ft_exec_req(struct ft_cmd *cmd) -{ - pr_debug("cmd state %x\n", cmd->state); - switch (cmd->state) { - case FC_CMD_ST_NEW: - ft_send_cmd(cmd); - break; - default: - break; - } -} - -/* - * Processing thread. - * Currently one thread per tpg. - */ -int ft_thread(void *arg) -{ - struct ft_tpg *tpg = arg; - struct se_queue_obj *qobj = &tpg->qobj; - struct ft_cmd *cmd; - - while (!kthread_should_stop()) { - schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT); - if (kthread_should_stop()) - goto out; - - cmd = ft_dequeue_cmd(qobj); - if (cmd) - ft_exec_req(cmd); - } - -out: - return 0; -} diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index b15879d43e2..8fa39b74f22 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -327,7 +327,6 @@ static struct se_portal_group *ft_add_tpg( tpg->index = index; tpg->lport_acl = lacl; INIT_LIST_HEAD(&tpg->lun_list); - transport_init_queue_obj(&tpg->qobj); ret = core_tpg_register(&ft_configfs->tf_ops, wwn, &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); @@ -336,8 +335,8 @@ static struct se_portal_group *ft_add_tpg( return NULL; } - tpg->thread = kthread_run(ft_thread, tpg, "ft_tpg%lu", index); - if (IS_ERR(tpg->thread)) { + tpg->workqueue = alloc_workqueue("tcm_fc", 0, 1); + if (!tpg->workqueue) { kfree(tpg); return NULL; } @@ -356,7 +355,7 @@ static void ft_del_tpg(struct se_portal_group *se_tpg) pr_debug("del tpg %s\n", config_item_name(&tpg->se_tpg.tpg_group.cg_item)); - kthread_stop(tpg->thread); + destroy_workqueue(tpg->workqueue); /* Wait for sessions to be freed thru RCU, for BUG_ON below */ synchronize_rcu(); diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index c37f4cd9645..d35ea5a3d56 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -219,43 +219,41 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp) if (cmd->was_ddp_setup) { BUG_ON(!ep); BUG_ON(!lport); - } - - /* - * Doesn't expect payload if DDP is setup. Payload - * is expected to be copied directly to user buffers - * due to DDP (Large Rx offload), - */ - buf = fc_frame_payload_get(fp, 1); - if (buf) - pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, " + /* + * Since DDP (Large Rx offload) was setup for this request, + * payload is expected to be copied directly to user buffers. + */ + buf = fc_frame_payload_get(fp, 1); + if (buf) + pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, " "cmd->sg_cnt 0x%x. DDP was setup" " hence not expected to receive frame with " - "payload, Frame will be dropped if " - "'Sequence Initiative' bit in f_ctl is " + "payload, Frame will be dropped if" + "'Sequence Initiative' bit in f_ctl is" "not set\n", __func__, ep->xid, f_ctl, cmd->sg, cmd->sg_cnt); - /* - * Invalidate HW DDP context if it was setup for respective - * command. Invalidation of HW DDP context is requited in both - * situation (success and error). - */ - ft_invl_hw_context(cmd); + /* + * Invalidate HW DDP context if it was setup for respective + * command. Invalidation of HW DDP context is requited in both + * situation (success and error). + */ + ft_invl_hw_context(cmd); - /* - * If "Sequence Initiative (TSI)" bit set in f_ctl, means last - * write data frame is received successfully where payload is - * posted directly to user buffer and only the last frame's - * header is posted in receive queue. - * - * If "Sequence Initiative (TSI)" bit is not set, means error - * condition w.r.t. DDP, hence drop the packet and let explict - * ABORTS from other end of exchange timer trigger the recovery. - */ - if (f_ctl & FC_FC_SEQ_INIT) - goto last_frame; - else - goto drop; + /* + * If "Sequence Initiative (TSI)" bit set in f_ctl, means last + * write data frame is received successfully where payload is + * posted directly to user buffer and only the last frame's + * header is posted in receive queue. + * + * If "Sequence Initiative (TSI)" bit is not set, means error + * condition w.r.t. DDP, hence drop the packet and let explict + * ABORTS from other end of exchange timer trigger the recovery. + */ + if (f_ctl & FC_FC_SEQ_INIT) + goto last_frame; + else + goto drop; + } rel_off = ntohl(fh->fh_parm_offset); frame_len = fr_len(fp); diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c index 225123b37f1..58be715913c 100644 --- a/drivers/tty/serial/crisv10.c +++ b/drivers/tty/serial/crisv10.c @@ -4450,7 +4450,7 @@ static int __init rs_init(void) #if defined(CONFIG_ETRAX_RS485) #if defined(CONFIG_ETRAX_RS485_ON_PA) - if (cris_io_interface_allocate_pins(if_ser0, 'a', rs485_pa_bit, + if (cris_io_interface_allocate_pins(if_serial_0, 'a', rs485_pa_bit, rs485_pa_bit)) { printk(KERN_CRIT "ETRAX100LX serial: Could not allocate " "RS485 pin\n"); @@ -4459,7 +4459,7 @@ static int __init rs_init(void) } #endif #if defined(CONFIG_ETRAX_RS485_ON_PORT_G) - if (cris_io_interface_allocate_pins(if_ser0, 'g', rs485_pa_bit, + if (cris_io_interface_allocate_pins(if_serial_0, 'g', rs485_pa_bit, rs485_port_g_bit)) { printk(KERN_CRIT "ETRAX100LX serial: Could not allocate " "RS485 pin\n"); diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c index 58cf279ed87..bc95f52cad8 100644 --- a/drivers/tty/serial/lantiq.c +++ b/drivers/tty/serial/lantiq.c @@ -478,8 +478,10 @@ lqasc_set_termios(struct uart_port *port, spin_unlock_irqrestore(<q_asc_lock, flags); /* Don't rewrite B0 */ - if (tty_termios_baud_rate(new)) + if (tty_termios_baud_rate(new)) tty_termios_encode_baud_rate(new, baud, baud); + + uart_update_timeout(port, cflag, baud); } static const char* diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 1e96d1f1fe6..723f8231193 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -761,7 +761,7 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) memset(buf, 0, retval); status = 0; - mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC; + mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC; spin_lock_irqsave(&xhci->lock, flags); /* For each port, did anything change? If so, set that bit in buf. */ diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 54139a2f06c..952e2ded61a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1934,8 +1934,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, int status = -EINPROGRESS; struct urb_priv *urb_priv; struct xhci_ep_ctx *ep_ctx; + struct list_head *tmp; u32 trb_comp_code; int ret = 0; + int td_num = 0; slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags)); xdev = xhci->devs[slot_id]; @@ -1957,6 +1959,12 @@ static int handle_tx_event(struct xhci_hcd *xhci, return -ENODEV; } + /* Count current td numbers if ep->skip is set */ + if (ep->skip) { + list_for_each(tmp, &ep_ring->td_list) + td_num++; + } + event_dma = le64_to_cpu(event->buffer); trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); /* Look for common error cases */ @@ -2068,7 +2076,18 @@ static int handle_tx_event(struct xhci_hcd *xhci, goto cleanup; } + /* We've skipped all the TDs on the ep ring when ep->skip set */ + if (ep->skip && td_num == 0) { + ep->skip = false; + xhci_dbg(xhci, "All tds on the ep_ring skipped. " + "Clear skip flag.\n"); + ret = 0; + goto cleanup; + } + td = list_entry(ep_ring->td_list.next, struct xhci_td, td_list); + if (ep->skip) + td_num--; /* Is this a TRB in the currently executing TD? */ event_seg = trb_in_td(ep_ring->deq_seg, ep_ring->dequeue, diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 410fba45378..809cbda03d7 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -494,15 +494,16 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, asminline_call(&cmn_regs, cru_rom_addr); die_nmi_called = 1; spin_unlock_irqrestore(&rom_lock, rom_pl); + + if (allow_kdump) + hpwdt_stop(); + if (!is_icru) { if (cmn_regs.u1.ral == 0) { - printk(KERN_WARNING "hpwdt: An NMI occurred, " + panic("An NMI occurred, " "but unable to determine source.\n"); } } - - if (allow_kdump) - hpwdt_stop(); panic("An NMI occurred, please see the Integrated " "Management Log for details.\n"); diff --git a/drivers/watchdog/lantiq_wdt.c b/drivers/watchdog/lantiq_wdt.c index 7d82adac1cb..102aed0efbf 100644 --- a/drivers/watchdog/lantiq_wdt.c +++ b/drivers/watchdog/lantiq_wdt.c @@ -51,16 +51,16 @@ static int ltq_wdt_ok_to_close; static void ltq_wdt_enable(void) { - ltq_wdt_timeout = ltq_wdt_timeout * + unsigned long int timeout = ltq_wdt_timeout * (ltq_io_region_clk_rate / LTQ_WDT_DIVIDER) + 0x1000; - if (ltq_wdt_timeout > LTQ_MAX_TIMEOUT) - ltq_wdt_timeout = LTQ_MAX_TIMEOUT; + if (timeout > LTQ_MAX_TIMEOUT) + timeout = LTQ_MAX_TIMEOUT; /* write the first password magic */ ltq_w32(LTQ_WDT_PW1, ltq_wdt_membase + LTQ_WDT_CR); /* write the second magic plus the configuration and new timeout */ ltq_w32(LTQ_WDT_SR_EN | LTQ_WDT_SR_PWD | LTQ_WDT_SR_CLKDIV | - LTQ_WDT_PW2 | ltq_wdt_timeout, ltq_wdt_membase + LTQ_WDT_CR); + LTQ_WDT_PW2 | timeout, ltq_wdt_membase + LTQ_WDT_CR); } static void diff --git a/drivers/watchdog/sbc_epx_c3.c b/drivers/watchdog/sbc_epx_c3.c index 3066a5127ca..eaca366b723 100644 --- a/drivers/watchdog/sbc_epx_c3.c +++ b/drivers/watchdog/sbc_epx_c3.c @@ -173,7 +173,7 @@ static struct notifier_block epx_c3_notifier = { .notifier_call = epx_c3_notify_sys, }; -static const char banner[] __initdata = KERN_INFO PFX +static const char banner[] __initconst = KERN_INFO PFX "Hardware Watchdog Timer for Winsystems EPX-C3 SBC: 0.1\n"; static int __init watchdog_init(void) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index d33520d0b4c..1199da0f98c 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -59,7 +59,7 @@ static struct watchdog_device *wdd; static int watchdog_ping(struct watchdog_device *wddev) { - if (test_bit(WDOG_ACTIVE, &wdd->status)) { + if (test_bit(WDOG_ACTIVE, &wddev->status)) { if (wddev->ops->ping) return wddev->ops->ping(wddev); /* ping the watchdog */ else @@ -81,12 +81,12 @@ static int watchdog_start(struct watchdog_device *wddev) { int err; - if (!test_bit(WDOG_ACTIVE, &wdd->status)) { + if (!test_bit(WDOG_ACTIVE, &wddev->status)) { err = wddev->ops->start(wddev); if (err < 0) return err; - set_bit(WDOG_ACTIVE, &wdd->status); + set_bit(WDOG_ACTIVE, &wddev->status); } return 0; } @@ -105,18 +105,18 @@ static int watchdog_stop(struct watchdog_device *wddev) { int err = -EBUSY; - if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) { + if (test_bit(WDOG_NO_WAY_OUT, &wddev->status)) { pr_info("%s: nowayout prevents watchdog to be stopped!\n", - wdd->info->identity); + wddev->info->identity); return err; } - if (test_bit(WDOG_ACTIVE, &wdd->status)) { + if (test_bit(WDOG_ACTIVE, &wddev->status)) { err = wddev->ops->stop(wddev); if (err < 0) return err; - clear_bit(WDOG_ACTIVE, &wdd->status); + clear_bit(WDOG_ACTIVE, &wddev->status); } return 0; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index da70f5c32eb..7523719bf8a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -54,7 +54,7 @@ * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. */ -static DEFINE_SPINLOCK(irq_mapping_update_lock); +static DEFINE_MUTEX(irq_mapping_update_lock); static LIST_HEAD(xen_irq_list_head); @@ -631,7 +631,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, int irq = -1; struct physdev_irq irq_op; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = find_irq_by_gsi(gsi); if (irq != -1) { @@ -684,7 +684,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, handle_edge_irq, name); out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -710,7 +710,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, { int irq, ret; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = xen_allocate_irq_dynamic(); if (irq == -1) @@ -724,10 +724,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (ret < 0) goto error_irq; out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; error_irq: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); xen_free_irq(irq); return -1; } @@ -740,7 +740,7 @@ int xen_destroy_irq(int irq) struct irq_info *info = info_for_irq(irq); int rc = -ENOENT; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); desc = irq_to_desc(irq); if (!desc) @@ -766,7 +766,7 @@ int xen_destroy_irq(int irq) xen_free_irq(irq); out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return rc; } @@ -776,7 +776,7 @@ int xen_irq_from_pirq(unsigned pirq) struct irq_info *info; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); list_for_each_entry(info, &xen_irq_list_head, list) { if (info == NULL || info->type != IRQT_PIRQ) @@ -787,7 +787,7 @@ int xen_irq_from_pirq(unsigned pirq) } irq = -1; out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -802,7 +802,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) { int irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = evtchn_to_irq[evtchn]; @@ -818,7 +818,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -829,7 +829,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) struct evtchn_bind_ipi bind_ipi; int evtchn, irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = per_cpu(ipi_to_irq, cpu)[ipi]; @@ -853,7 +853,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -878,7 +878,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) struct evtchn_bind_virq bind_virq; int evtchn, irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = per_cpu(virq_to_irq, cpu)[virq]; @@ -903,7 +903,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -913,7 +913,7 @@ static void unbind_from_irq(unsigned int irq) struct evtchn_close close; int evtchn = evtchn_from_irq(irq); - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); if (VALID_EVTCHN(evtchn)) { close.port = evtchn; @@ -943,7 +943,7 @@ static void unbind_from_irq(unsigned int irq) xen_free_irq(irq); - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); } int bind_evtchn_to_irqhandler(unsigned int evtchn, @@ -1279,7 +1279,7 @@ void rebind_evtchn_irq(int evtchn, int irq) will also be masked. */ disable_irq(irq); - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); /* After resume the irq<->evtchn mappings are all cleared out */ BUG_ON(evtchn_to_irq[evtchn] != -1); @@ -1289,7 +1289,7 @@ void rebind_evtchn_irq(int evtchn, int irq) xen_irq_info_evtchn_init(irq, evtchn); - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); /* new event channels are always bound to cpu 0 */ irq_set_affinity(irq, cpumask_of(0)); diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c index e0c2807b097..181fa8158a8 100644 --- a/drivers/zorro/zorro.c +++ b/drivers/zorro/zorro.c @@ -148,10 +148,10 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, bus); - /* Register all devices */ pr_info("Zorro: Probing AutoConfig expansion devices: %u device%s\n", zorro_num_autocon, zorro_num_autocon == 1 ? "" : "s"); + /* First identify all devices ... */ for (i = 0; i < zorro_num_autocon; i++) { z = &zorro_autocon[i]; z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8); @@ -172,6 +172,11 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) dev_set_name(&z->dev, "%02x", i); z->dev.parent = &bus->dev; z->dev.bus = &zorro_bus_type; + } + + /* ... then register them */ + for (i = 0; i < zorro_num_autocon; i++) { + z = &zorro_autocon[i]; error = device_register(&z->dev); if (error) { dev_err(&bus->dev, "Error registering device %s\n", diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 89b6ce3634f..c0ddfd29c5e 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ - compression.o delayed-ref.o relocation.o delayed-inode.o backref.o \ - scrub.o + compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ + reada.o backref.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index eb159aaa5a1..89b156d85d6 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -59,22 +59,19 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) if (!value) return ERR_PTR(-ENOMEM); size = __btrfs_getxattr(inode, name, value, size); - if (size > 0) { - acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) { - kfree(value); - return acl; - } - set_cached_acl(inode, type, acl); - } - kfree(value); + } + if (size > 0) { + acl = posix_acl_from_xattr(value, size); } else if (size == -ENOENT || size == -ENODATA || size == 0) { /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; - set_cached_acl(inode, type, acl); } else { acl = ERR_PTR(-EIO); } + kfree(value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); return acl; } diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d9f99a16edd..5a5d325a393 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -103,11 +103,6 @@ struct btrfs_inode { */ u64 delalloc_bytes; - /* total number of bytes that may be used for this inode for - * delalloc - */ - u64 reserved_bytes; - /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk @@ -115,9 +110,6 @@ struct btrfs_inode { */ u64 disk_i_size; - /* flags field from the on disk inode */ - u32 flags; - /* * if this is a directory then index_cnt is the counter for the index * number for new files that are created @@ -132,6 +124,15 @@ struct btrfs_inode { u64 last_unlink_trans; /* + * Number of bytes outstanding that are going to need csums. This is + * used in ENOSPC accounting. + */ + u64 csum_bytes; + + /* flags field from the on disk inode */ + u32 flags; + + /* * Counters to keep track of the number of extent item's we may use due * to delalloc and such. outstanding_extents is the number of extent * items we think we'll end up using, and reserved_extents is the number diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8ec5d86f173..14f1c5a0b2d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -85,7 +85,8 @@ struct compressed_bio { static inline int compressed_bio_size(struct btrfs_root *root, unsigned long disk_size) { - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + return sizeof(struct compressed_bio) + ((disk_size + root->sectorsize - 1) / root->sectorsize) * csum_size; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 011cab3aca8..0fe615e4ea3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -902,9 +902,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, orig_ptr = btrfs_node_blockptr(mid, orig_slot); - if (level < BTRFS_MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; - pslot = path->slots[level + 1]; + pslot = path->slots[level + 1]; + } /* * deal with the case where there is only one pointer in the root @@ -1107,9 +1108,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, mid = path->nodes[level]; WARN_ON(btrfs_header_generation(mid) != trans->transid); - if (level < BTRFS_MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; - pslot = path->slots[level + 1]; + pslot = path->slots[level + 1]; + } if (!parent) return 1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03912c5c6f4..b9ba59ff929 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -30,6 +30,7 @@ #include <linux/kobject.h> #include <trace/events/btrfs.h> #include <asm/kmap_types.h> +#include <linux/pagemap.h> #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -360,6 +361,47 @@ struct btrfs_header { #define BTRFS_LABEL_SIZE 256 /* + * just in case we somehow lose the roots and are not able to mount, + * we store an array of the roots from previous transactions + * in the super. + */ +#define BTRFS_NUM_BACKUP_ROOTS 4 +struct btrfs_root_backup { + __le64 tree_root; + __le64 tree_root_gen; + + __le64 chunk_root; + __le64 chunk_root_gen; + + __le64 extent_root; + __le64 extent_root_gen; + + __le64 fs_root; + __le64 fs_root_gen; + + __le64 dev_root; + __le64 dev_root_gen; + + __le64 csum_root; + __le64 csum_root_gen; + + __le64 total_bytes; + __le64 bytes_used; + __le64 num_devices; + /* future */ + __le64 unsed_64[4]; + + u8 tree_root_level; + u8 chunk_root_level; + u8 extent_root_level; + u8 fs_root_level; + u8 dev_root_level; + u8 csum_root_level; + /* future and to align */ + u8 unused_8[10]; +} __attribute__ ((__packed__)); + +/* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc */ @@ -405,6 +447,7 @@ struct btrfs_super_block { /* future expansion */ __le64 reserved[31]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; + struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; } __attribute__ ((__packed__)); /* @@ -772,14 +815,8 @@ struct btrfs_space_info { struct btrfs_block_rsv { u64 size; u64 reserved; - u64 freed[2]; struct btrfs_space_info *space_info; - struct list_head list; spinlock_t lock; - atomic_t usage; - unsigned int priority:8; - unsigned int durable:1; - unsigned int refill_used:1; unsigned int full:1; }; @@ -840,10 +877,10 @@ struct btrfs_block_group_cache { spinlock_t lock; u64 pinned; u64 reserved; - u64 reserved_pinned; u64 bytes_super; u64 flags; u64 sectorsize; + u64 cache_generation; unsigned int ro:1; unsigned int dirty:1; unsigned int iref:1; @@ -899,6 +936,10 @@ struct btrfs_fs_info { spinlock_t block_group_cache_lock; struct rb_root block_group_cache_tree; + /* keep track of unallocated space */ + spinlock_t free_chunk_lock; + u64 free_chunk_space; + struct extent_io_tree freed_extents[2]; struct extent_io_tree *pinned_extents; @@ -916,14 +957,11 @@ struct btrfs_fs_info { struct btrfs_block_rsv trans_block_rsv; /* block reservation for chunk tree */ struct btrfs_block_rsv chunk_block_rsv; + /* block reservation for delayed operations */ + struct btrfs_block_rsv delayed_block_rsv; struct btrfs_block_rsv empty_block_rsv; - /* list of block reservations that cross multiple transactions */ - struct list_head durable_block_rsv_list; - - struct mutex durable_block_rsv_mutex; - u64 generation; u64 last_trans_committed; @@ -942,8 +980,8 @@ struct btrfs_fs_info { wait_queue_head_t transaction_blocked_wait; wait_queue_head_t async_submit_wait; - struct btrfs_super_block super_copy; - struct btrfs_super_block super_for_commit; + struct btrfs_super_block *super_copy; + struct btrfs_super_block *super_for_commit; struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; @@ -1036,6 +1074,7 @@ struct btrfs_fs_info { struct btrfs_workers endio_freespace_worker; struct btrfs_workers submit_workers; struct btrfs_workers caching_workers; + struct btrfs_workers readahead_workers; /* * fixup workers take dirty pages that didn't properly go through @@ -1119,6 +1158,13 @@ struct btrfs_fs_info { u64 fs_state; struct btrfs_delayed_root *delayed_root; + + /* readahead tree */ + spinlock_t reada_lock; + struct radix_tree_root reada_tree; + + /* next backup root to be overwritten */ + int backup_root_index; }; /* @@ -1363,6 +1409,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) +#define BTRFS_MOUNT_RECOVERY (1 << 18) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -1978,6 +2025,55 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root) return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; } +/* struct btrfs_root_backup */ +BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, + tree_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, + tree_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, + tree_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, + chunk_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, + chunk_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, + extent_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, + extent_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, + extent_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, + fs_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, + fs_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, + fs_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, + dev_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, + dev_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, + dev_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, + csum_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, + csum_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, + csum_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, + num_devices, 64); + /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); @@ -2129,6 +2225,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); } +static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) +{ + return mapping_gfp_mask(mapping) & ~__GFP_FS; +} + /* extent-tree.c */ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) @@ -2137,6 +2238,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 3 * num_items; } +/* + * Doing a truncate won't result in new nodes or leaves, just what we need for + * COW. + */ +static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, + unsigned num_items) +{ + return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * + num_items; +} + void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); @@ -2146,6 +2258,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 num_bytes, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, u64 bytenr); @@ -2196,8 +2311,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo); +int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, + u64 start, u64 len); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -2240,25 +2355,23 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); void btrfs_free_block_rsv(struct btrfs_root *root, struct btrfs_block_rsv *rsv); -void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv); -int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_add(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_add_noflush(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes); +int btrfs_block_rsv_check(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int min_factor); +int btrfs_block_rsv_refill(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, - u64 min_reserved, int min_factor); + u64 min_reserved); int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, struct btrfs_block_rsv *dst_rsv, u64 num_bytes); void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *rsv); int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); int btrfs_set_block_group_rw(struct btrfs_root *root, @@ -2379,6 +2492,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) smp_mb(); return fs_info->closing; } +static inline void free_fs_info(struct btrfs_fs_info *fs_info) +{ + kfree(fs_info->delayed_root); + kfree(fs_info->extent_root); + kfree(fs_info->tree_root); + kfree(fs_info->chunk_root); + kfree(fs_info->dev_root); + kfree(fs_info->csum_root); + kfree(fs_info->super_copy); + kfree(fs_info->super_for_commit); + kfree(fs_info); +} /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, @@ -2579,11 +2704,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); -void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve); -void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending); void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); @@ -2697,4 +2817,20 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, struct btrfs_scrub_progress *progress); +/* reada.c */ +struct reada_control { + struct btrfs_root *root; /* tree to prefetch */ + struct btrfs_key key_start; + struct btrfs_key key_end; /* exclusive */ + atomic_t elems; + struct kref refcnt; + wait_queue_head_t wait; +}; +struct reada_control *btrfs_reada_add(struct btrfs_root *root, + struct btrfs_key *start, struct btrfs_key *end); +int btrfs_reada_wait(void *handle); +void btrfs_reada_detach(void *handle); +int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, + u64 start, int err); + #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b52c672f4c1..bbe8496d533 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -591,7 +591,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, return 0; src_rsv = trans->block_rsv; - dst_rsv = &root->fs_info->global_block_rsv; + dst_rsv = &root->fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(root, 1); ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); @@ -609,7 +609,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, if (!item->bytes_reserved) return; - rsv = &root->fs_info->global_block_rsv; + rsv = &root->fs_info->delayed_block_rsv; btrfs_block_rsv_release(root, rsv, item->bytes_reserved); } @@ -624,13 +624,36 @@ static int btrfs_delayed_inode_reserve_metadata( u64 num_bytes; int ret; - if (!trans->bytes_reserved) - return 0; - src_rsv = trans->block_rsv; - dst_rsv = &root->fs_info->global_block_rsv; + dst_rsv = &root->fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(root, 1); + + /* + * btrfs_dirty_inode will update the inode under btrfs_join_transaction + * which doesn't reserve space for speed. This is a problem since we + * still need to reserve space for this update, so try to reserve the + * space. + * + * Now if src_rsv == delalloc_block_rsv we'll let it just steal since + * we're accounted for. + */ + if (!trans->bytes_reserved && + src_rsv != &root->fs_info->delalloc_block_rsv) { + ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); + /* + * Since we're under a transaction reserve_metadata_bytes could + * try to commit the transaction which will make it return + * EAGAIN to make us stop the transaction we have, so return + * ENOSPC instead so that btrfs_dirty_inode knows what to do. + */ + if (ret == -EAGAIN) + ret = -ENOSPC; + if (!ret) + node->bytes_reserved = num_bytes; + return ret; + } + ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); if (!ret) node->bytes_reserved = num_bytes; @@ -646,7 +669,7 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, if (!node->bytes_reserved) return; - rsv = &root->fs_info->global_block_rsv; + rsv = &root->fs_info->delayed_block_rsv; btrfs_block_rsv_release(root, rsv, node->bytes_reserved); node->bytes_reserved = 0; @@ -1026,7 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, path->leave_spinning = 1; block_rsv = trans->block_rsv; - trans->block_rsv = &root->fs_info->global_block_rsv; + trans->block_rsv = &root->fs_info->delayed_block_rsv; delayed_root = btrfs_get_delayed_root(root); @@ -1069,7 +1092,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, path->leave_spinning = 1; block_rsv = trans->block_rsv; - trans->block_rsv = &node->root->fs_info->global_block_rsv; + trans->block_rsv = &node->root->fs_info->delayed_block_rsv; ret = btrfs_insert_delayed_items(trans, path, node->root, node); if (!ret) @@ -1149,7 +1172,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) goto free_path; block_rsv = trans->block_rsv; - trans->block_rsv = &root->fs_info->global_block_rsv; + trans->block_rsv = &root->fs_info->delayed_block_rsv; ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); if (!ret) @@ -1686,11 +1709,8 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, } ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); - /* - * we must reserve enough space when we start a new transaction, - * so reserving metadata failure is impossible - */ - BUG_ON(ret); + if (ret) + goto release_node; fill_stack_inode_item(trans, &delayed_node->inode_item, inode); delayed_node->inode_dirty = 1; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dc034380253..0eb1f095125 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -256,8 +256,7 @@ void btrfs_csum_final(u32 crc, char *result) static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); char *result = NULL; unsigned long len; unsigned long cur_len; @@ -367,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { - ret = read_extent_buffer_pages(io_tree, eb, start, 1, + ret = read_extent_buffer_pages(io_tree, eb, start, + WAIT_COMPLETE, btree_get_extent, mirror_num); if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) @@ -608,11 +608,47 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; err: + if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { + clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); + btree_readahead_hook(root, eb, eb->start, ret); + } + free_extent_buffer(eb); out: return ret; } +static int btree_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + u64 mirror_num, struct extent_state *state) +{ + struct extent_io_tree *tree; + unsigned long len; + struct extent_buffer *eb; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + + len = page->private >> 2; + WARN_ON(len == 0); + + eb = alloc_extent_buffer(tree, start, len, page); + if (eb == NULL) + goto out; + + if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { + clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); + btree_readahead_hook(root, eb, eb->start, -EIO); + } + +out: + return -EIO; /* we fixed nothing */ +} + static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; @@ -974,11 +1010,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 0, btree_get_extent, 0); + buf, 0, WAIT_NONE, btree_get_extent, 0); free_extent_buffer(buf); return ret; } +int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, + int mirror_num, struct extent_buffer **eb) +{ + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; + int ret; + + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!buf) + return 0; + + set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); + + ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, + btree_get_extent, mirror_num); + if (ret) { + free_extent_buffer(buf); + return ret; + } + + if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { + free_extent_buffer(buf); + return -EIO; + } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { + *eb = buf; + } else { + free_extent_buffer(buf); + } + return 0; +} + struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -1135,10 +1203,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root, generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->commit_root = NULL; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { free_extent_buffer(root->node); + root->node = NULL; return -EIO; } root->commit_root = btrfs_root_node(root); @@ -1577,6 +1647,228 @@ sleep: return 0; } +/* + * this will find the highest generation in the array of + * root backups. The index of the highest array is returned, + * or -1 if we can't find anything. + * + * We check to make sure the array is valid by comparing the + * generation of the latest root in the array with the generation + * in the super block. If they don't match we pitch it. + */ +static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen) +{ + u64 cur; + int newest_index = -1; + struct btrfs_root_backup *root_backup; + int i; + + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { + root_backup = info->super_copy->super_roots + i; + cur = btrfs_backup_tree_root_gen(root_backup); + if (cur == newest_gen) + newest_index = i; + } + + /* check to see if we actually wrapped around */ + if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) { + root_backup = info->super_copy->super_roots; + cur = btrfs_backup_tree_root_gen(root_backup); + if (cur == newest_gen) + newest_index = 0; + } + return newest_index; +} + + +/* + * find the oldest backup so we know where to store new entries + * in the backup array. This will set the backup_root_index + * field in the fs_info struct + */ +static void find_oldest_super_backup(struct btrfs_fs_info *info, + u64 newest_gen) +{ + int newest_index = -1; + + newest_index = find_newest_super_backup(info, newest_gen); + /* if there was garbage in there, just move along */ + if (newest_index == -1) { + info->backup_root_index = 0; + } else { + info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS; + } +} + +/* + * copy all the root pointers into the super backup array. + * this will bump the backup pointer by one when it is + * done + */ +static void backup_super_roots(struct btrfs_fs_info *info) +{ + int next_backup; + struct btrfs_root_backup *root_backup; + int last_backup; + + next_backup = info->backup_root_index; + last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) % + BTRFS_NUM_BACKUP_ROOTS; + + /* + * just overwrite the last backup if we're at the same generation + * this happens only at umount + */ + root_backup = info->super_for_commit->super_roots + last_backup; + if (btrfs_backup_tree_root_gen(root_backup) == + btrfs_header_generation(info->tree_root->node)) + next_backup = last_backup; + + root_backup = info->super_for_commit->super_roots + next_backup; + + /* + * make sure all of our padding and empty slots get zero filled + * regardless of which ones we use today + */ + memset(root_backup, 0, sizeof(*root_backup)); + + info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; + + btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); + btrfs_set_backup_tree_root_gen(root_backup, + btrfs_header_generation(info->tree_root->node)); + + btrfs_set_backup_tree_root_level(root_backup, + btrfs_header_level(info->tree_root->node)); + + btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); + btrfs_set_backup_chunk_root_gen(root_backup, + btrfs_header_generation(info->chunk_root->node)); + btrfs_set_backup_chunk_root_level(root_backup, + btrfs_header_level(info->chunk_root->node)); + + btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start); + btrfs_set_backup_extent_root_gen(root_backup, + btrfs_header_generation(info->extent_root->node)); + btrfs_set_backup_extent_root_level(root_backup, + btrfs_header_level(info->extent_root->node)); + + btrfs_set_backup_fs_root(root_backup, info->fs_root->node->start); + btrfs_set_backup_fs_root_gen(root_backup, + btrfs_header_generation(info->fs_root->node)); + btrfs_set_backup_fs_root_level(root_backup, + btrfs_header_level(info->fs_root->node)); + + btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); + btrfs_set_backup_dev_root_gen(root_backup, + btrfs_header_generation(info->dev_root->node)); + btrfs_set_backup_dev_root_level(root_backup, + btrfs_header_level(info->dev_root->node)); + + btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start); + btrfs_set_backup_csum_root_gen(root_backup, + btrfs_header_generation(info->csum_root->node)); + btrfs_set_backup_csum_root_level(root_backup, + btrfs_header_level(info->csum_root->node)); + + btrfs_set_backup_total_bytes(root_backup, + btrfs_super_total_bytes(info->super_copy)); + btrfs_set_backup_bytes_used(root_backup, + btrfs_super_bytes_used(info->super_copy)); + btrfs_set_backup_num_devices(root_backup, + btrfs_super_num_devices(info->super_copy)); + + /* + * if we don't copy this out to the super_copy, it won't get remembered + * for the next commit + */ + memcpy(&info->super_copy->super_roots, + &info->super_for_commit->super_roots, + sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); +} + +/* + * this copies info out of the root backup array and back into + * the in-memory super block. It is meant to help iterate through + * the array, so you send it the number of backups you've already + * tried and the last backup index you used. + * + * this returns -1 when it has tried all the backups + */ +static noinline int next_root_backup(struct btrfs_fs_info *info, + struct btrfs_super_block *super, + int *num_backups_tried, int *backup_index) +{ + struct btrfs_root_backup *root_backup; + int newest = *backup_index; + + if (*num_backups_tried == 0) { + u64 gen = btrfs_super_generation(super); + + newest = find_newest_super_backup(info, gen); + if (newest == -1) + return -1; + + *backup_index = newest; + *num_backups_tried = 1; + } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) { + /* we've tried all the backups, all done */ + return -1; + } else { + /* jump to the next oldest backup */ + newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) % + BTRFS_NUM_BACKUP_ROOTS; + *backup_index = newest; + *num_backups_tried += 1; + } + root_backup = super->super_roots + newest; + + btrfs_set_super_generation(super, + btrfs_backup_tree_root_gen(root_backup)); + btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); + btrfs_set_super_root_level(super, + btrfs_backup_tree_root_level(root_backup)); + btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); + + /* + * fixme: the total bytes and num_devices need to match or we should + * need a fsck + */ + btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); + btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); + return 0; +} + +/* helper to cleanup tree roots */ +static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) +{ + free_extent_buffer(info->tree_root->node); + free_extent_buffer(info->tree_root->commit_root); + free_extent_buffer(info->dev_root->node); + free_extent_buffer(info->dev_root->commit_root); + free_extent_buffer(info->extent_root->node); + free_extent_buffer(info->extent_root->commit_root); + free_extent_buffer(info->csum_root->node); + free_extent_buffer(info->csum_root->commit_root); + + info->tree_root->node = NULL; + info->tree_root->commit_root = NULL; + info->dev_root->node = NULL; + info->dev_root->commit_root = NULL; + info->extent_root->node = NULL; + info->extent_root->commit_root = NULL; + info->csum_root->node = NULL; + info->csum_root->commit_root = NULL; + + if (chunk_root) { + free_extent_buffer(info->chunk_root->node); + free_extent_buffer(info->chunk_root->commit_root); + info->chunk_root->node = NULL; + info->chunk_root->commit_root = NULL; + } +} + + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1604,6 +1896,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, int ret; int err = -EINVAL; + int num_backups_tried = 0; + int backup_index = 0; struct btrfs_super_block *disk_super; @@ -1648,6 +1942,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); + spin_lock_init(&fs_info->free_chunk_lock); mutex_init(&fs_info->reloc_mutex); init_completion(&fs_info->kobj_unregister); @@ -1665,8 +1960,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_block_rsv(&fs_info->trans_block_rsv); btrfs_init_block_rsv(&fs_info->chunk_block_rsv); btrfs_init_block_rsv(&fs_info->empty_block_rsv); - INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); - mutex_init(&fs_info->durable_block_rsv_mutex); + btrfs_init_block_rsv(&fs_info->delayed_block_rsv); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); @@ -1677,6 +1971,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; fs_info->trans_no_join = 0; + fs_info->free_chunk_space = 0; + + /* readahead state */ + INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); + spin_lock_init(&fs_info->reada_lock); fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); @@ -1766,14 +2065,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_alloc; } - memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); - memcpy(&fs_info->super_for_commit, &fs_info->super_copy, - sizeof(fs_info->super_for_commit)); + memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); + memcpy(fs_info->super_for_commit, fs_info->super_copy, + sizeof(*fs_info->super_for_commit)); brelse(bh); - memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); + memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); - disk_super = &fs_info->super_copy; + disk_super = fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_alloc; @@ -1783,6 +2082,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); /* + * run through our array of backup supers and setup + * our ring pointer to the oldest one + */ + generation = btrfs_super_generation(disk_super); + find_oldest_super_backup(fs_info, generation); + + /* * In the long term, we'll store the compression type in the super * block, and it'll be used for per file compression control. */ @@ -1870,6 +2176,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", fs_info->thread_pool_size, &fs_info->generic_worker); + btrfs_init_workers(&fs_info->readahead_workers, "readahead", + fs_info->thread_pool_size, + &fs_info->generic_worker); /* * endios are largely parallel and should have a very @@ -1880,6 +2189,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->endio_write_workers.idle_thresh = 2; fs_info->endio_meta_write_workers.idle_thresh = 2; + fs_info->readahead_workers.idle_thresh = 2; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->generic_worker, 1); @@ -1893,6 +2203,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->endio_freespace_worker, 1); btrfs_start_workers(&fs_info->delayed_workers, 1); btrfs_start_workers(&fs_info->caching_workers, 1); + btrfs_start_workers(&fs_info->readahead_workers, 1); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -1939,7 +2250,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", sb->s_id); - goto fail_chunk_root; + goto fail_tree_roots; } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); chunk_root->commit_root = btrfs_root_node(chunk_root); @@ -1954,11 +2265,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (ret) { printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", sb->s_id); - goto fail_chunk_root; + goto fail_tree_roots; } btrfs_close_extra_devices(fs_devices); +retry_root_backup: blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); generation = btrfs_super_generation(disk_super); @@ -1966,32 +2278,33 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize, generation); - if (!tree_root->node) - goto fail_chunk_root; - if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { + if (!tree_root->node || + !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", sb->s_id); - goto fail_tree_root; + + goto recovery_tree_root; } + btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) - goto fail_tree_root; + goto recovery_tree_root; extent_root->track_dirty = 1; ret = find_and_setup_root(tree_root, fs_info, BTRFS_DEV_TREE_OBJECTID, dev_root); if (ret) - goto fail_extent_root; + goto recovery_tree_root; dev_root->track_dirty = 1; ret = find_and_setup_root(tree_root, fs_info, BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) - goto fail_dev_root; + goto recovery_tree_root; csum_root->track_dirty = 1; @@ -2124,20 +2437,10 @@ fail_cleaner: fail_block_groups: btrfs_free_block_groups(fs_info); - free_extent_buffer(csum_root->node); - free_extent_buffer(csum_root->commit_root); -fail_dev_root: - free_extent_buffer(dev_root->node); - free_extent_buffer(dev_root->commit_root); -fail_extent_root: - free_extent_buffer(extent_root->node); - free_extent_buffer(extent_root->commit_root); -fail_tree_root: - free_extent_buffer(tree_root->node); - free_extent_buffer(tree_root->commit_root); -fail_chunk_root: - free_extent_buffer(chunk_root->node); - free_extent_buffer(chunk_root->commit_root); + +fail_tree_roots: + free_root_pointers(fs_info, 1); + fail_sb_buffer: btrfs_stop_workers(&fs_info->generic_worker); btrfs_stop_workers(&fs_info->fixup_workers); @@ -2152,7 +2455,6 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->caching_workers); fail_alloc: - kfree(fs_info->delayed_root); fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); @@ -2164,13 +2466,27 @@ fail_bdi: fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: - kfree(extent_root); - kfree(tree_root); - kfree(fs_info); - kfree(chunk_root); - kfree(dev_root); - kfree(csum_root); + free_fs_info(fs_info); return ERR_PTR(err); + +recovery_tree_root: + + if (!btrfs_test_opt(tree_root, RECOVERY)) + goto fail_tree_roots; + + free_root_pointers(fs_info, 0); + + /* don't use the log in recovery mode, it won't be valid */ + btrfs_set_super_log_root(disk_super, 0); + + /* we can't trust the free space cache either */ + btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); + + ret = next_root_backup(fs_info, fs_info->super_copy, + &num_backups_tried, &backup_index); + if (ret == -1) + goto fail_block_groups; + goto retry_root_backup; } static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) @@ -2338,10 +2654,11 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; + max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); + backup_super_roots(root->fs_info); - sb = &root->fs_info->super_for_commit; + sb = root->fs_info->super_for_commit; dev_item = &sb->dev_item; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); @@ -2545,8 +2862,6 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(root->fs_info); - btrfs_put_block_group_cache(fs_info); - /* * Here come 2 situations when btrfs is broken to flip readonly: * @@ -2572,6 +2887,8 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } + btrfs_put_block_group_cache(fs_info); + kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); @@ -2603,7 +2920,6 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); iput(fs_info->btree_inode); - kfree(fs_info->delayed_root); btrfs_stop_workers(&fs_info->generic_worker); btrfs_stop_workers(&fs_info->fixup_workers); @@ -2617,6 +2933,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->caching_workers); + btrfs_stop_workers(&fs_info->readahead_workers); btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -2624,12 +2941,7 @@ int close_ctree(struct btrfs_root *root) bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); - kfree(fs_info->extent_root); - kfree(fs_info->tree_root); - kfree(fs_info->chunk_root); - kfree(fs_info->dev_root); - kfree(fs_info->csum_root); - kfree(fs_info); + free_fs_info(fs_info); return 0; } @@ -2735,7 +3047,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return ret; } -int btree_lock_page_hook(struct page *page) +static int btree_lock_page_hook(struct page *page, void *data, + void (*flush_fn)(void *)) { struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2752,7 +3065,10 @@ int btree_lock_page_hook(struct page *page) if (!eb) goto out; - btrfs_tree_lock(eb); + if (!btrfs_try_tree_write_lock(eb)) { + flush_fn(data); + btrfs_tree_lock(eb); + } btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { @@ -2767,7 +3083,10 @@ int btree_lock_page_hook(struct page *page) btrfs_tree_unlock(eb); free_extent_buffer(eb); out: - lock_page(page); + if (!trylock_page(page)) { + flush_fn(data); + lock_page(page); + } return 0; } @@ -3123,6 +3442,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, + .readpage_io_failed_hook = btree_io_failed_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index bec3ea4bd67..c99d0a8f13f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -40,6 +40,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); +int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, + int mirror_num, struct extent_buffer **eb); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, @@ -83,8 +85,6 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btree_lock_page_hook(struct page *page); - #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 119f842c1d4..18ea90c8943 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,6 +23,7 @@ #include <linux/rcupdate.h> #include <linux/kthread.h> #include <linux/slab.h> +#include <linux/ratelimit.h> #include "compat.h" #include "hash.h" #include "ctree.h" @@ -52,6 +53,21 @@ enum { CHUNK_ALLOC_LIMITED = 2, }; +/* + * Control how reservations are dealt with. + * + * RESERVE_FREE - freeing a reservation. + * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for + * ENOSPC accounting + * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update + * bytes_may_use as the ENOSPC accounting is done elsewhere + */ +enum { + RESERVE_FREE = 0, + RESERVE_ALLOC = 1, + RESERVE_ALLOC_NO_ACCOUNT = 2, +}; + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -81,6 +97,8 @@ static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); +static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -104,7 +122,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) if (atomic_dec_and_test(&cache->count)) { WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); - WARN_ON(cache->reserved_pinned > 0); kfree(cache->free_space_ctl); kfree(cache); } @@ -465,7 +482,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * we likely hold important locks. */ if (trans && (!trans->transaction->in_commit) && - (root && root != root->fs_info->tree_root)) { + (root && root != root->fs_info->tree_root) && + btrfs_test_opt(root, SPACE_CACHE)) { spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { spin_unlock(&cache->lock); @@ -2700,6 +2718,13 @@ again: goto again; } + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -2749,12 +2774,15 @@ again: if (!ret) dcs = BTRFS_DC_SETUP; btrfs_free_reserved_data_space(inode, num_pages); + out_put: iput(inode); out_free: btrfs_release_path(path); out: spin_lock(&block_group->lock); + if (!ret) + block_group->cache_generation = trans->transid; block_group->disk_cache_state = dcs; spin_unlock(&block_group->lock); @@ -3122,16 +3150,13 @@ commit_trans: return -ENOSPC; } data_sinfo->bytes_may_use += bytes; - BTRFS_I(inode)->reserved_bytes += bytes; spin_unlock(&data_sinfo->lock); return 0; } /* - * called when we are clearing an delalloc extent from the - * inode's io_tree or there was an error for whatever reason - * after calling btrfs_check_data_free_space + * Called if we need to clear a data reservation for this inode. */ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) { @@ -3144,7 +3169,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) data_sinfo = BTRFS_I(inode)->space_info; spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; - BTRFS_I(inode)->reserved_bytes -= bytes; spin_unlock(&data_sinfo->lock); } @@ -3165,6 +3189,7 @@ static int should_alloc_chunk(struct btrfs_root *root, struct btrfs_space_info *sinfo, u64 alloc_bytes, int force) { + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; u64 thresh; @@ -3173,11 +3198,18 @@ static int should_alloc_chunk(struct btrfs_root *root, return 1; /* + * We need to take into account the global rsv because for all intents + * and purposes it's used space. Don't worry about locking the + * global_rsv, it doesn't change except when the transaction commits. + */ + num_allocated += global_rsv->size; + + /* * in limited mode, we want to have some free space up to * about 1% of the FS size. */ if (force == CHUNK_ALLOC_LIMITED) { - thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = btrfs_super_total_bytes(root->fs_info->super_copy); thresh = max_t(u64, 64 * 1024 * 1024, div_factor_fine(thresh, 1)); @@ -3199,7 +3231,7 @@ static int should_alloc_chunk(struct btrfs_root *root, if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) return 0; - thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = btrfs_super_total_bytes(root->fs_info->super_copy); /* 256MB or 5% of the FS */ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); @@ -3302,24 +3334,26 @@ out: /* * shrink metadata reservation for delalloc */ -static int shrink_delalloc(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 to_reclaim, int sync) +static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, + bool wait_ordered) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; + struct btrfs_trans_handle *trans; u64 reserved; u64 max_reclaim; u64 reclaimed = 0; long time_left; - int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; + unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; unsigned long progress; + trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; smp_mb(); - reserved = space_info->bytes_reserved; + reserved = space_info->bytes_may_use; progress = space_info->reservation_progress; if (reserved == 0) @@ -3334,7 +3368,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } max_reclaim = min(reserved, to_reclaim); - + nr_pages = max_t(unsigned long, nr_pages, + max_reclaim >> PAGE_CACHE_SHIFT); while (loops < 1024) { /* have the flusher threads jump in and do some IO */ smp_mb(); @@ -3343,9 +3378,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_reserved) - reclaimed += reserved - space_info->bytes_reserved; - reserved = space_info->bytes_reserved; + if (reserved > space_info->bytes_may_use) + reclaimed += reserved - space_info->bytes_may_use; + reserved = space_info->bytes_may_use; spin_unlock(&space_info->lock); loops++; @@ -3356,11 +3391,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (trans && trans->transaction->blocked) return -EAGAIN; - time_left = schedule_timeout_interruptible(1); + if (wait_ordered && !trans) { + btrfs_wait_ordered_extents(root, 0, 0); + } else { + time_left = schedule_timeout_interruptible(1); - /* We were interrupted, exit */ - if (time_left) - break; + /* We were interrupted, exit */ + if (time_left) + break; + } /* we've kicked the IO a few times, if anything has been freed, * exit. There is no sense in looping here for a long time @@ -3375,34 +3414,90 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } } - if (reclaimed >= to_reclaim && !trans) - btrfs_wait_ordered_extents(root, 0, 0); + return reclaimed >= to_reclaim; } -/* - * Retries tells us how many times we've called reserve_metadata_bytes. The - * idea is if this is the first call (retries == 0) then we will add to our - * reserved count if we can't make the allocation in order to hold our place - * while we go and try and free up space. That way for retries > 1 we don't try - * and add space, we just check to see if the amount of unused space is >= the - * total space, meaning that our reservation is valid. +/** + * maybe_commit_transaction - possibly commit the transaction if its ok to + * @root - the root we're allocating for + * @bytes - the number of bytes we want to reserve + * @force - force the commit * - * However if we don't intend to retry this reservation, pass -1 as retries so - * that it short circuits this logic. + * This will check to make sure that committing the transaction will actually + * get us somewhere and then commit the transaction if it does. Otherwise it + * will return -ENOSPC. */ -static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int may_commit_transaction(struct btrfs_root *root, + struct btrfs_space_info *space_info, + u64 bytes, int force) +{ + struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv; + struct btrfs_trans_handle *trans; + + trans = (struct btrfs_trans_handle *)current->journal_info; + if (trans) + return -EAGAIN; + + if (force) + goto commit; + + /* See if there is enough pinned space to make this reservation */ + spin_lock(&space_info->lock); + if (space_info->bytes_pinned >= bytes) { + spin_unlock(&space_info->lock); + goto commit; + } + spin_unlock(&space_info->lock); + + /* + * See if there is some space in the delayed insertion reservation for + * this reservation. + */ + if (space_info != delayed_rsv->space_info) + return -ENOSPC; + + spin_lock(&delayed_rsv->lock); + if (delayed_rsv->size < bytes) { + spin_unlock(&delayed_rsv->lock); + return -ENOSPC; + } + spin_unlock(&delayed_rsv->lock); + +commit: + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return -ENOSPC; + + return btrfs_commit_transaction(trans, root); +} + +/** + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space + * @root - the root we're allocating for + * @block_rsv - the block_rsv we're allocating for + * @orig_bytes - the number of bytes we want + * @flush - wether or not we can flush to make our reservation + * + * This will reserve orgi_bytes number of bytes from the space info associated + * with the block_rsv. If there is not enough space it will make an attempt to + * flush out space to make room. It will do this by flushing delalloc if + * possible or committing the transaction. If flush is 0 then no attempts to + * regain reservations will be made and this will fail if there is not enough + * space already. + */ +static int reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 orig_bytes, int flush) { struct btrfs_space_info *space_info = block_rsv->space_info; - u64 unused; + u64 used; u64 num_bytes = orig_bytes; int retries = 0; int ret = 0; bool committed = false; bool flushing = false; + bool wait_ordered = false; again: ret = 0; @@ -3419,7 +3514,7 @@ again: * deadlock since we are waiting for the flusher to finish, but * hold the current transaction open. */ - if (trans) + if (current->journal_info) return -EAGAIN; ret = wait_event_interruptible(space_info->wait, !space_info->flush); @@ -3431,9 +3526,9 @@ again: } ret = -ENOSPC; - unused = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use; + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; /* * The idea here is that we've not already over-reserved the block group @@ -3442,10 +3537,9 @@ again: * lets start flushing stuff first and then come back and try to make * our reservation. */ - if (unused <= space_info->total_bytes) { - unused = space_info->total_bytes - unused; - if (unused >= num_bytes) { - space_info->bytes_reserved += orig_bytes; + if (used <= space_info->total_bytes) { + if (used + orig_bytes <= space_info->total_bytes) { + space_info->bytes_may_use += orig_bytes; ret = 0; } else { /* @@ -3461,10 +3555,64 @@ again: * amount plus the amount of bytes that we need for this * reservation. */ - num_bytes = unused - space_info->total_bytes + + wait_ordered = true; + num_bytes = used - space_info->total_bytes + (orig_bytes * (retries + 1)); } + if (ret) { + u64 profile = btrfs_get_alloc_profile(root, 0); + u64 avail; + + /* + * If we have a lot of space that's pinned, don't bother doing + * the overcommit dance yet and just commit the transaction. + */ + avail = (space_info->total_bytes - space_info->bytes_used) * 8; + do_div(avail, 10); + if (space_info->bytes_pinned >= avail && flush && !committed) { + space_info->flush = 1; + flushing = true; + spin_unlock(&space_info->lock); + ret = may_commit_transaction(root, space_info, + orig_bytes, 1); + if (ret) + goto out; + committed = true; + goto again; + } + + spin_lock(&root->fs_info->free_chunk_lock); + avail = root->fs_info->free_chunk_space; + + /* + * If we have dup, raid1 or raid10 then only half of the free + * space is actually useable. + */ + if (profile & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + avail >>= 1; + + /* + * If we aren't flushing don't let us overcommit too much, say + * 1/8th of the space. If we can flush, let it overcommit up to + * 1/2 of the space. + */ + if (flush) + avail >>= 3; + else + avail >>= 1; + spin_unlock(&root->fs_info->free_chunk_lock); + + if (used + num_bytes < space_info->total_bytes + avail) { + space_info->bytes_may_use += orig_bytes; + ret = 0; + } else { + wait_ordered = true; + } + } + /* * Couldn't make our reservation, save our place so while we're trying * to reclaim space we can actually use it instead of somebody else @@ -3484,7 +3632,7 @@ again: * We do synchronous shrinking since we don't actually unreserve * metadata until after the IO is completed. */ - ret = shrink_delalloc(trans, root, num_bytes, 1); + ret = shrink_delalloc(root, num_bytes, wait_ordered); if (ret < 0) goto out; @@ -3496,35 +3644,17 @@ again: * so go back around and try again. */ if (retries < 2) { + wait_ordered = true; retries++; goto again; } - /* - * Not enough space to be reclaimed, don't bother committing the - * transaction. - */ - spin_lock(&space_info->lock); - if (space_info->bytes_pinned < orig_bytes) - ret = -ENOSPC; - spin_unlock(&space_info->lock); - if (ret) - goto out; - - ret = -EAGAIN; - if (trans) - goto out; - ret = -ENOSPC; if (committed) goto out; - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - goto out; - ret = btrfs_commit_transaction(trans, root); + ret = may_commit_transaction(root, space_info, orig_bytes, 0); if (!ret) { - trans = NULL; committed = true; goto again; } @@ -3542,10 +3672,12 @@ out: static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_block_rsv *block_rsv; - if (root->ref_cows) + struct btrfs_block_rsv *block_rsv = NULL; + + if (root->ref_cows || root == root->fs_info->csum_root) block_rsv = trans->block_rsv; - else + + if (!block_rsv) block_rsv = root->block_rsv; if (!block_rsv) @@ -3616,7 +3748,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, } if (num_bytes) { spin_lock(&space_info->lock); - space_info->bytes_reserved -= num_bytes; + space_info->bytes_may_use -= num_bytes; space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -3640,9 +3772,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) { memset(rsv, 0, sizeof(*rsv)); spin_lock_init(&rsv->lock); - atomic_set(&rsv->usage, 1); - rsv->priority = 6; - INIT_LIST_HEAD(&rsv->list); } struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) @@ -3663,38 +3792,38 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) void btrfs_free_block_rsv(struct btrfs_root *root, struct btrfs_block_rsv *rsv) { - if (rsv && atomic_dec_and_test(&rsv->usage)) { - btrfs_block_rsv_release(root, rsv, (u64)-1); - if (!rsv->durable) - kfree(rsv); - } + btrfs_block_rsv_release(root, rsv, (u64)-1); + kfree(rsv); } -/* - * make the block_rsv struct be able to capture freed space. - * the captured space will re-add to the the block_rsv struct - * after transaction commit - */ -void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv) +int btrfs_block_rsv_add(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes) { - block_rsv->durable = 1; - mutex_lock(&fs_info->durable_block_rsv_mutex); - list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); - mutex_unlock(&fs_info->durable_block_rsv_mutex); + int ret; + + if (num_bytes == 0) + return 0; + + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); + if (!ret) { + block_rsv_add_bytes(block_rsv, num_bytes, 1); + return 0; + } + + return ret; } -int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 num_bytes) +int btrfs_block_rsv_add_noflush(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes) { int ret; if (num_bytes == 0) return 0; - ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 1); return 0; @@ -3703,55 +3832,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, return ret; } -int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 min_reserved, int min_factor) +int btrfs_block_rsv_check(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int min_factor) { u64 num_bytes = 0; - int commit_trans = 0; int ret = -ENOSPC; if (!block_rsv) return 0; spin_lock(&block_rsv->lock); - if (min_factor > 0) - num_bytes = div_factor(block_rsv->size, min_factor); - if (min_reserved > num_bytes) - num_bytes = min_reserved; + num_bytes = div_factor(block_rsv->size, min_factor); + if (block_rsv->reserved >= num_bytes) + ret = 0; + spin_unlock(&block_rsv->lock); - if (block_rsv->reserved >= num_bytes) { + return ret; +} + +int btrfs_block_rsv_refill(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 min_reserved) +{ + u64 num_bytes = 0; + int ret = -ENOSPC; + + if (!block_rsv) + return 0; + + spin_lock(&block_rsv->lock); + num_bytes = min_reserved; + if (block_rsv->reserved >= num_bytes) ret = 0; - } else { + else num_bytes -= block_rsv->reserved; - if (block_rsv->durable && - block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) - commit_trans = 1; - } spin_unlock(&block_rsv->lock); + if (!ret) return 0; - if (block_rsv->refill_used) { - ret = reserve_metadata_bytes(trans, root, block_rsv, - num_bytes, 0); - if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, 0); - return 0; - } - } - - if (commit_trans) { - if (trans) - return -EAGAIN; - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - ret = btrfs_commit_transaction(trans, root); + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); + if (!ret) { + block_rsv_add_bytes(block_rsv, num_bytes, 0); return 0; } - return -ENOSPC; + return ret; } int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, @@ -3783,7 +3909,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) u64 num_bytes; u64 meta_used; u64 data_used; - int csum_size = btrfs_super_csum_size(&fs_info->super_copy); + int csum_size = btrfs_super_csum_size(fs_info->super_copy); sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); spin_lock(&sinfo->lock); @@ -3827,12 +3953,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) if (sinfo->total_bytes > num_bytes) { num_bytes = sinfo->total_bytes - num_bytes; block_rsv->reserved += num_bytes; - sinfo->bytes_reserved += num_bytes; + sinfo->bytes_may_use += num_bytes; } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; - sinfo->bytes_reserved -= num_bytes; + sinfo->bytes_may_use -= num_bytes; sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -3848,16 +3974,13 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); fs_info->chunk_block_rsv.space_info = space_info; - fs_info->chunk_block_rsv.priority = 10; space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); fs_info->global_block_rsv.space_info = space_info; - fs_info->global_block_rsv.priority = 10; - fs_info->global_block_rsv.refill_used = 1; fs_info->delalloc_block_rsv.space_info = space_info; fs_info->trans_block_rsv.space_info = space_info; fs_info->empty_block_rsv.space_info = space_info; - fs_info->empty_block_rsv.priority = 10; + fs_info->delayed_block_rsv.space_info = space_info; fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; @@ -3865,10 +3988,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; - btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); - - btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); - update_global_block_rsv(fs_info); } @@ -3881,37 +4000,8 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); WARN_ON(fs_info->chunk_block_rsv.reserved > 0); -} - -int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *rsv) -{ - struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; - u64 num_bytes; - int ret; - - /* - * Truncate should be freeing data, but give us 2 items just in case it - * needs to use some space. We may want to be smarter about this in the - * future. - */ - num_bytes = btrfs_calc_trans_metadata_size(root, 2); - - /* We already have enough bytes, just return */ - if (rsv->reserved >= num_bytes) - return 0; - - num_bytes -= rsv->reserved; - - /* - * You should have reserved enough space before hand to do this, so this - * should not fail. - */ - ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); - BUG_ON(ret); - - return 0; + WARN_ON(fs_info->delayed_block_rsv.size > 0); + WARN_ON(fs_info->delayed_block_rsv.reserved > 0); } void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, @@ -3920,9 +4010,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, if (!trans->bytes_reserved) return; - BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); - btrfs_block_rsv_release(root, trans->block_rsv, - trans->bytes_reserved); + btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; } @@ -3964,11 +4052,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } +/** + * drop_outstanding_extent - drop an outstanding extent + * @inode: the inode we're dropping the extent for + * + * This is called when we are freeing up an outstanding extent, either called + * after an error or after an extent is written. This will return the number of + * reserved extents that need to be freed. This must be called with + * BTRFS_I(inode)->lock held. + */ static unsigned drop_outstanding_extent(struct inode *inode) { unsigned dropped_extents = 0; - spin_lock(&BTRFS_I(inode)->lock); BUG_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; @@ -3978,19 +4074,70 @@ static unsigned drop_outstanding_extent(struct inode *inode) */ if (BTRFS_I(inode)->outstanding_extents >= BTRFS_I(inode)->reserved_extents) - goto out; + return 0; dropped_extents = BTRFS_I(inode)->reserved_extents - BTRFS_I(inode)->outstanding_extents; BTRFS_I(inode)->reserved_extents -= dropped_extents; -out: - spin_unlock(&BTRFS_I(inode)->lock); return dropped_extents; } -static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) +/** + * calc_csum_metadata_size - return the amount of metada space that must be + * reserved/free'd for the given bytes. + * @inode: the inode we're manipulating + * @num_bytes: the number of bytes in question + * @reserve: 1 if we are reserving space, 0 if we are freeing space + * + * This adjusts the number of csum_bytes in the inode and then returns the + * correct amount of metadata that must either be reserved or freed. We + * calculate how many checksums we can fit into one leaf and then divide the + * number of bytes that will need to be checksumed by this value to figure out + * how many checksums will be required. If we are adding bytes then the number + * may go up and we will return the number of additional bytes that must be + * reserved. If it is going down we will return the number of bytes that must + * be freed. + * + * This must be called with BTRFS_I(inode)->lock held. + */ +static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, + int reserve) { - return num_bytes >>= 3; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 csum_size; + int num_csums_per_leaf; + int num_csums; + int old_csums; + + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && + BTRFS_I(inode)->csum_bytes == 0) + return 0; + + old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + if (reserve) + BTRFS_I(inode)->csum_bytes += num_bytes; + else + BTRFS_I(inode)->csum_bytes -= num_bytes; + csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); + num_csums_per_leaf = (int)div64_u64(csum_size, + sizeof(struct btrfs_csum_item) + + sizeof(struct btrfs_disk_key)); + num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + num_csums = num_csums + num_csums_per_leaf - 1; + num_csums = num_csums / num_csums_per_leaf; + + old_csums = old_csums + num_csums_per_leaf - 1; + old_csums = old_csums / num_csums_per_leaf; + + /* No change, no need to reserve more */ + if (old_csums == num_csums) + return 0; + + if (reserve) + return btrfs_calc_trans_metadata_size(root, + num_csums - old_csums); + + return btrfs_calc_trans_metadata_size(root, old_csums - num_csums); } int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) @@ -3999,9 +4146,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; u64 to_reserve = 0; unsigned nr_extents = 0; + int flush = 1; int ret; - if (btrfs_transaction_in_commit(root->fs_info)) + if (btrfs_is_free_space_inode(root, inode)) + flush = 0; + + if (flush && btrfs_transaction_in_commit(root->fs_info)) schedule_timeout(1); num_bytes = ALIGN(num_bytes, root->sectorsize); @@ -4017,18 +4168,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); } + to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); spin_unlock(&BTRFS_I(inode)->lock); - to_reserve += calc_csum_metadata_size(inode, num_bytes); - ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); + ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); if (ret) { + u64 to_free = 0; unsigned dropped; + + spin_lock(&BTRFS_I(inode)->lock); + dropped = drop_outstanding_extent(inode); + to_free = calc_csum_metadata_size(inode, num_bytes, 0); + spin_unlock(&BTRFS_I(inode)->lock); + to_free += btrfs_calc_trans_metadata_size(root, dropped); + /* - * We don't need the return value since our reservation failed, - * we just need to clean up our counter. + * Somebody could have come in and twiddled with the + * reservation, so if we have to free more than we would have + * reserved from this reservation go ahead and release those + * bytes. */ - dropped = drop_outstanding_extent(inode); - WARN_ON(dropped > 1); + to_free -= to_reserve; + if (to_free) + btrfs_block_rsv_release(root, block_rsv, to_free); return ret; } @@ -4037,6 +4199,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) return 0; } +/** + * btrfs_delalloc_release_metadata - release a metadata reservation for an inode + * @inode: the inode to release the reservation for + * @num_bytes: the number of bytes we're releasing + * + * This will release the metadata reservation for an inode. This can be called + * once we complete IO for a given set of bytes to release their metadata + * reservations. + */ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -4044,9 +4215,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) unsigned dropped; num_bytes = ALIGN(num_bytes, root->sectorsize); + spin_lock(&BTRFS_I(inode)->lock); dropped = drop_outstanding_extent(inode); - to_free = calc_csum_metadata_size(inode, num_bytes); + to_free = calc_csum_metadata_size(inode, num_bytes, 0); + spin_unlock(&BTRFS_I(inode)->lock); if (dropped > 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); @@ -4054,6 +4227,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) to_free); } +/** + * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc + * @inode: inode we're writing to + * @num_bytes: the number of bytes we want to allocate + * + * This will do the following things + * + * o reserve space in the data space info for num_bytes + * o reserve space in the metadata space info based on number of outstanding + * extents and how much csums will be needed + * o add to the inodes ->delalloc_bytes + * o add it to the fs_info's delalloc inodes list. + * + * This will return 0 for success and -ENOSPC if there is no space left. + */ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) { int ret; @@ -4071,6 +4259,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) return 0; } +/** + * btrfs_delalloc_release_space - release data and metadata space for delalloc + * @inode: inode we're releasing space for + * @num_bytes: the number of bytes we want to free up + * + * This must be matched with a call to btrfs_delalloc_reserve_space. This is + * called in the case that we don't need the metadata AND data reservations + * anymore. So if there is an error or we insert an inline extent. + * + * This function will release the metadata space that was not used and will + * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes + * list if there are no delalloc bytes left. + */ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) { btrfs_delalloc_release_metadata(inode, num_bytes); @@ -4090,12 +4291,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, /* block accounting for super block */ spin_lock(&info->delalloc_lock); - old_val = btrfs_super_bytes_used(&info->super_copy); + old_val = btrfs_super_bytes_used(info->super_copy); if (alloc) old_val += num_bytes; else old_val -= num_bytes; - btrfs_set_super_bytes_used(&info->super_copy, old_val); + btrfs_set_super_bytes_used(info->super_copy, old_val); spin_unlock(&info->delalloc_lock); while (total) { @@ -4123,7 +4324,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); - if (btrfs_super_cache_generation(&info->super_copy) != 0 && + if (btrfs_test_opt(root, SPACE_CACHE) && cache->disk_cache_state < BTRFS_DC_CLEAR) cache->disk_cache_state = BTRFS_DC_CLEAR; @@ -4135,7 +4336,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, btrfs_set_block_group_used(&cache->item, old_val); cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->reservation_progress++; cache->space_info->bytes_used += num_bytes; cache->space_info->disk_used += num_bytes * factor; spin_unlock(&cache->lock); @@ -4187,7 +4387,6 @@ static int pin_down_extent(struct btrfs_root *root, if (reserved) { cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->reservation_progress++; } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); @@ -4215,45 +4414,82 @@ int btrfs_pin_extent(struct btrfs_root *root, } /* - * update size of reserved extents. this function may return -EAGAIN - * if 'reserve' is true or 'sinfo' is false. + * this function must be called within transaction + */ +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes) +{ + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + + /* + * pull in the free space cache (if any) so that our pin + * removes the free space from the cache. We have load_only set + * to one because the slow code to read in the free extents does check + * the pinned extents. + */ + cache_block_group(cache, trans, root, 1); + + pin_down_extent(root, cache, bytenr, num_bytes, 0); + + /* remove us from the free space cache (if we're there at all) */ + btrfs_remove_free_space(cache, bytenr, num_bytes); + btrfs_put_block_group(cache); + return 0; +} + +/** + * btrfs_update_reserved_bytes - update the block_group and space info counters + * @cache: The cache we are manipulating + * @num_bytes: The number of bytes in question + * @reserve: One of the reservation enums + * + * This is called by the allocator when it reserves space, or by somebody who is + * freeing space that was never actually used on disk. For example if you + * reserve some space for a new leaf in transaction A and before transaction A + * commits you free that leaf, you call this with reserve set to 0 in order to + * clear the reservation. + * + * Metadata reservations should be called with RESERVE_ALLOC so we do the proper + * ENOSPC accounting. For data we handle the reservation through clearing the + * delalloc bits in the io_tree. We have to do this since we could end up + * allocating less disk space for the amount of data we have reserved in the + * case of compression. + * + * If this is a reservation and the block group has become read only we cannot + * make the reservation and return -EAGAIN, otherwise this function always + * succeeds. */ -int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo) +static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve) { + struct btrfs_space_info *space_info = cache->space_info; int ret = 0; - if (sinfo) { - struct btrfs_space_info *space_info = cache->space_info; - spin_lock(&space_info->lock); - spin_lock(&cache->lock); - if (reserve) { - if (cache->ro) { - ret = -EAGAIN; - } else { - cache->reserved += num_bytes; - space_info->bytes_reserved += num_bytes; - } - } else { - if (cache->ro) - space_info->bytes_readonly += num_bytes; - cache->reserved -= num_bytes; - space_info->bytes_reserved -= num_bytes; - space_info->reservation_progress++; - } - spin_unlock(&cache->lock); - spin_unlock(&space_info->lock); - } else { - spin_lock(&cache->lock); + spin_lock(&space_info->lock); + spin_lock(&cache->lock); + if (reserve != RESERVE_FREE) { if (cache->ro) { ret = -EAGAIN; } else { - if (reserve) - cache->reserved += num_bytes; - else - cache->reserved -= num_bytes; + cache->reserved += num_bytes; + space_info->bytes_reserved += num_bytes; + if (reserve == RESERVE_ALLOC) { + BUG_ON(space_info->bytes_may_use < num_bytes); + space_info->bytes_may_use -= num_bytes; + } } - spin_unlock(&cache->lock); + } else { + if (cache->ro) + space_info->bytes_readonly += num_bytes; + cache->reserved -= num_bytes; + space_info->bytes_reserved -= num_bytes; + space_info->reservation_progress++; } + spin_unlock(&cache->lock); + spin_unlock(&space_info->lock); return ret; } @@ -4319,13 +4555,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) spin_lock(&cache->lock); cache->pinned -= len; cache->space_info->bytes_pinned -= len; - if (cache->ro) { + if (cache->ro) cache->space_info->bytes_readonly += len; - } else if (cache->reserved_pinned > 0) { - len = min(len, cache->reserved_pinned); - cache->reserved_pinned -= len; - cache->space_info->bytes_reserved += len; - } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } @@ -4340,11 +4571,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *unpin; - struct btrfs_block_rsv *block_rsv; - struct btrfs_block_rsv *next_rsv; u64 start; u64 end; - int idx; int ret; if (fs_info->pinned_extents == &fs_info->freed_extents[0]) @@ -4367,30 +4595,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, cond_resched(); } - mutex_lock(&fs_info->durable_block_rsv_mutex); - list_for_each_entry_safe(block_rsv, next_rsv, - &fs_info->durable_block_rsv_list, list) { - - idx = trans->transid & 0x1; - if (block_rsv->freed[idx] > 0) { - block_rsv_add_bytes(block_rsv, - block_rsv->freed[idx], 0); - block_rsv->freed[idx] = 0; - } - if (atomic_read(&block_rsv->usage) == 0) { - btrfs_block_rsv_release(root, block_rsv, (u64)-1); - - if (block_rsv->freed[0] == 0 && - block_rsv->freed[1] == 0) { - list_del_init(&block_rsv->list); - kfree(block_rsv); - } - } else { - btrfs_block_rsv_release(root, block_rsv, 0); - } - } - mutex_unlock(&fs_info->durable_block_rsv_mutex); - return 0; } @@ -4668,7 +4872,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf, u64 parent, int last_ref) { - struct btrfs_block_rsv *block_rsv; struct btrfs_block_group_cache *cache = NULL; int ret; @@ -4683,64 +4886,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (!last_ref) return; - block_rsv = get_block_rsv(trans, root); cache = btrfs_lookup_block_group(root->fs_info, buf->start); - if (block_rsv->space_info != cache->space_info) - goto out; if (btrfs_header_generation(buf) == trans->transid) { if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, root, buf->start); if (!ret) - goto pin; + goto out; } if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { pin_down_extent(root, cache, buf->start, buf->len, 1); - goto pin; + goto out; } WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); - if (ret == -EAGAIN) { - /* block group became read-only */ - btrfs_update_reserved_bytes(cache, buf->len, 0, 1); - goto out; - } - - ret = 1; - spin_lock(&block_rsv->lock); - if (block_rsv->reserved < block_rsv->size) { - block_rsv->reserved += buf->len; - ret = 0; - } - spin_unlock(&block_rsv->lock); - - if (ret) { - spin_lock(&cache->space_info->lock); - cache->space_info->bytes_reserved -= buf->len; - cache->space_info->reservation_progress++; - spin_unlock(&cache->space_info->lock); - } - goto out; - } -pin: - if (block_rsv->durable && !cache->ro) { - ret = 0; - spin_lock(&cache->lock); - if (!cache->ro) { - cache->reserved_pinned += buf->len; - ret = 1; - } - spin_unlock(&cache->lock); - - if (ret) { - spin_lock(&block_rsv->lock); - block_rsv->freed[trans->transid & 0x1] += buf->len; - spin_unlock(&block_rsv->lock); - } + btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); } out: /* @@ -4883,10 +5046,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, int last_ptr_loop = 0; int loop = 0; int index = 0; + int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? + RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool found_uncached_bg = false; bool failed_cluster_refill = false; bool failed_alloc = false; bool use_cluster = true; + bool have_caching_bg = false; u64 ideal_cache_percent = 0; u64 ideal_cache_offset = 0; @@ -4969,6 +5135,7 @@ ideal_cache: } } search: + have_caching_bg = false; down_read(&space_info->groups_sem); list_for_each_entry(block_group, &space_info->block_groups[index], list) { @@ -5177,6 +5344,8 @@ refill_cluster: failed_alloc = true; goto have_block_group; } else if (!offset) { + if (!cached) + have_caching_bg = true; goto loop; } checks: @@ -5202,8 +5371,8 @@ checks: search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, - (data & BTRFS_BLOCK_GROUP_DATA)); + ret = btrfs_update_reserved_bytes(block_group, num_bytes, + alloc_type); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -5227,6 +5396,9 @@ loop: } up_read(&space_info->groups_sem); + if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg) + goto search; + if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) goto search; @@ -5325,7 +5497,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int index = 0; spin_lock(&info->lock); - printk(KERN_INFO "space_info has %llu free, is %sfull\n", + printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", + (unsigned long long)info->flags, (unsigned long long)(info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved - info->bytes_readonly), @@ -5411,7 +5584,8 @@ again: return ret; } -int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) +static int __btrfs_free_reserved_extent(struct btrfs_root *root, + u64 start, u64 len, int pin) { struct btrfs_block_group_cache *cache; int ret = 0; @@ -5426,8 +5600,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) if (btrfs_test_opt(root, DISCARD)) ret = btrfs_discard_extent(root, start, len, NULL); - btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, 0, 1); + if (pin) + pin_down_extent(root, cache, start, len, 1); + else { + btrfs_add_free_space(cache, start, len); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + } btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, start, len); @@ -5435,6 +5613,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return ret; } +int btrfs_free_reserved_extent(struct btrfs_root *root, + u64 start, u64 len) +{ + return __btrfs_free_reserved_extent(root, start, len, 0); +} + +int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, + u64 start, u64 len) +{ + return __btrfs_free_reserved_extent(root, start, len, 1); +} + static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, @@ -5630,7 +5820,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, put_caching_control(caching_ctl); } - ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); + ret = btrfs_update_reserved_bytes(block_group, ins->offset, + RESERVE_ALLOC_NO_ACCOUNT); BUG_ON(ret); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, @@ -5687,8 +5878,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); if (block_rsv->size == 0) { - ret = reserve_metadata_bytes(trans, root, block_rsv, - blocksize, 0); + ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); /* * If we couldn't reserve metadata bytes try and use some from * the global reserve. @@ -5708,13 +5898,15 @@ use_block_rsv(struct btrfs_trans_handle *trans, if (!ret) return block_rsv; if (ret) { - WARN_ON(1); - ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, - 0); + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL, + /*DEFAULT_RATELIMIT_BURST*/ 2); + if (__ratelimit(&_rs)) { + printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); + WARN_ON(1); + } + ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); if (!ret) { - spin_lock(&block_rsv->lock); - block_rsv->size += blocksize; - spin_unlock(&block_rsv->lock); return block_rsv; } else if (ret && block_rsv != global_rsv) { ret = block_rsv_use_bytes(global_rsv, blocksize); @@ -6592,12 +6784,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) cache->bytes_super - btrfs_block_group_used(&cache->item); if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + - sinfo->bytes_may_use + sinfo->bytes_readonly + - cache->reserved_pinned + num_bytes + min_allocable_bytes <= - sinfo->total_bytes) { + sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes + + min_allocable_bytes <= sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; - sinfo->bytes_reserved += cache->reserved_pinned; - cache->reserved_pinned = 0; cache->ro = 1; ret = 0; } @@ -6964,7 +7153,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) struct btrfs_space_info, list); if (space_info->bytes_pinned > 0 || - space_info->bytes_reserved > 0) { + space_info->bytes_reserved > 0 || + space_info->bytes_may_use > 0) { WARN_ON(1); dump_space_info(space_info, 0, 0); } @@ -7006,14 +7196,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) return -ENOMEM; path->reada = 1; - cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); - if (cache_gen != 0 && - btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) + cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); + if (btrfs_test_opt(root, SPACE_CACHE) && + btrfs_super_generation(root->fs_info->super_copy) != cache_gen) need_clear = 1; if (btrfs_test_opt(root, CLEAR_CACHE)) need_clear = 1; - if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) - printk(KERN_INFO "btrfs: disk space caching is enabled\n"); while (1) { ret = find_first_block_group(root, path, &key); @@ -7252,7 +7440,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; } - inode = lookup_free_space_inode(root, block_group, path); + inode = lookup_free_space_inode(tree_root, block_group, path); if (!IS_ERR(inode)) { ret = btrfs_orphan_add(trans, inode); BUG_ON(ret); @@ -7268,7 +7456,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->lock); } /* One for our lookup ref */ - iput(inode); + btrfs_add_delayed_iput(inode); } key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -7339,7 +7527,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) int mixed = 0; int ret; - disk_super = &fs_info->super_copy; + disk_super = fs_info->super_copy; if (!btrfs_super_root(disk_super)) return 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 624ef10d36c..1f87c4d0e7a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -895,6 +895,194 @@ search_again: goto again; } +/** + * convert_extent - convert all bits in a given range from one bit to another + * @tree: the io tree to search + * @start: the start offset in bytes + * @end: the end offset in bytes (inclusive) + * @bits: the bits to set in this range + * @clear_bits: the bits to clear in this range + * @mask: the allocation mask + * + * This will go through and set bits for the given range. If any states exist + * already in this range they are set with the given bit and cleared of the + * clear_bits. This is only meant to be used by things that are mergeable, ie + * converting from say DELALLOC to DIRTY. This is not meant to be used with + * boundary bits like LOCK. + */ +int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int clear_bits, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err = 0; + u64 last_start; + u64 last_end; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + spin_lock(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(tree, start); + if (!node) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + return -ENOMEM; + err = insert_state(tree, prealloc, start, end, &bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); +hit_next: + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + struct rb_node *next_node; + + set_state_bits(tree, state, &bits); + clear_state_bit(tree, state, &clear_bits, 0); + + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + + start = last_end + 1; + next_node = rb_next(&state->rb_node); + if (next_node && start < end && prealloc && !need_resched()) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + return -ENOMEM; + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, &bits); + clear_state_bit(tree, state, &clear_bits, 0); + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start - 1; + + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + return -ENOMEM; + + /* + * Avoid to free 'prealloc' if it can be merged with + * the later extent. + */ + err = insert_state(tree, prealloc, start, this_end, + &bits); + BUG_ON(err == -EEXIST); + if (err) { + free_extent_state(prealloc); + prealloc = NULL; + goto out; + } + prealloc = NULL; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) + return -ENOMEM; + + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, &bits); + clear_state_bit(tree, prealloc, &clear_bits, 0); + + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + spin_unlock(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + spin_unlock(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} + /* wrappers around set/clear extent bit */ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) @@ -920,7 +1108,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, + EXTENT_DELALLOC | EXTENT_UPTODATE, 0, NULL, cached_state, mask); } @@ -2102,7 +2290,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (tree->ops && tree->ops->readpage_io_failed_hook) ret = tree->ops->readpage_io_failed_hook( bio, page, start, end, - failed_mirror, NULL); + failed_mirror, state); else ret = bio_readpage_error(bio, page, start, end, failed_mirror, NULL); @@ -2511,6 +2699,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, int compressed; int write_flags; unsigned long nr_written = 0; + bool fill_delalloc = true; if (wbc->sync_mode == WB_SYNC_ALL) write_flags = WRITE_SYNC; @@ -2520,6 +2709,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, trace___extent_writepage(page, inode, wbc); WARN_ON(!PageLocked(page)); + + ClearPageError(page); + pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || (page->index == end_index && !pg_offset)) { @@ -2541,10 +2733,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_page_extent_mapped(page); + if (!tree->ops || !tree->ops->fill_delalloc) + fill_delalloc = false; + delalloc_start = start; delalloc_end = 0; page_started = 0; - if (!epd->extent_locked) { + if (!epd->extent_locked && fill_delalloc) { u64 delalloc_to_write = 0; /* * make sure the wbc mapping index is at least updated @@ -2796,10 +2991,16 @@ retry: * swizzled back from swapper_space to tmpfs file * mapping */ - if (tree->ops && tree->ops->write_cache_pages_lock_hook) - tree->ops->write_cache_pages_lock_hook(page); - else - lock_page(page); + if (tree->ops && + tree->ops->write_cache_pages_lock_hook) { + tree->ops->write_cache_pages_lock_hook(page, + data, flush_fn); + } else { + if (!trylock_page(page)) { + flush_fn(data); + lock_page(page); + } + } if (unlikely(page->mapping != mapping)) { unlock_page(page); @@ -3579,6 +3780,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, PAGECACHE_TAG_DIRTY); } spin_unlock_irq(&page->mapping->tree_lock); + ClearPageError(page); unlock_page(page); } return 0; @@ -3724,8 +3926,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, } int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, - u64 start, int wait, + struct extent_buffer *eb, u64 start, int wait, get_extent_t *get_extent, int mirror_num) { unsigned long i; @@ -3761,7 +3962,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); - if (!wait) { + if (wait == WAIT_NONE) { if (!trylock_page(page)) goto unlock_exit; } else { @@ -3805,7 +4006,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (bio) submit_one_bio(READ, bio, mirror_num, bio_flags); - if (ret || !wait) + if (ret || wait != WAIT_COMPLETE) return ret; for (i = start_i; i < num_pages; i++) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a8e20b67292..feb9be0e23b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -17,7 +17,8 @@ #define EXTENT_NODATASUM (1 << 10) #define EXTENT_DO_ACCOUNTING (1 << 11) #define EXTENT_FIRST_DELALLOC (1 << 12) -#define EXTENT_DAMAGED (1 << 13) +#define EXTENT_NEED_WAIT (1 << 13) +#define EXTENT_DAMAGED (1 << 14) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) @@ -33,6 +34,7 @@ #define EXTENT_BUFFER_BLOCKING 1 #define EXTENT_BUFFER_DIRTY 2 #define EXTENT_BUFFER_CORRUPT 3 +#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ /* these are flags for extent_clear_unlock_delalloc */ #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 @@ -86,7 +88,8 @@ struct extent_io_ops { struct extent_state *other); void (*split_extent_hook)(struct inode *inode, struct extent_state *orig, u64 split); - int (*write_cache_pages_lock_hook)(struct page *page); + int (*write_cache_pages_lock_hook)(struct page *page, void *data, + void (*flush_fn)(void *)); }; struct extent_io_tree { @@ -215,6 +218,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int clear_bits, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, @@ -249,6 +254,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len); void free_extent_buffer(struct extent_buffer *eb); +#define WAIT_NONE 0 +#define WAIT_COMPLETE 1 +#define WAIT_PAGE_LOCK 2 int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, u64 start, int wait, get_extent_t *get_extent, int mirror_num); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a1cb7821bec..c7fb3a4247d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -91,8 +91,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item; struct extent_buffer *leaf; u64 csum_offset = 0; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); int csums_in_item; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; @@ -162,8 +161,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, u64 item_last_offset = 0; u64 disk_bytenr; u32 diff; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); int ret; struct btrfs_path *path; struct btrfs_csum_item *item = NULL; @@ -290,7 +288,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, int ret; size_t size; u64 csum_end; - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); if (!path) @@ -492,8 +490,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, u64 bytenr, u64 len) { struct extent_buffer *leaf; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); u64 csum_end; u64 end_byte = bytenr + len; u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; @@ -549,8 +546,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, u64 csum_end; struct extent_buffer *leaf; int ret; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); int blocksize_bits = root->fs_info->sb->s_blocksize_bits; root = root->fs_info->csum_root; @@ -676,8 +672,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); if (!path) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a381cd22f51..f2e92828960 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1036,11 +1036,13 @@ out: * on error we return an unlocked page and the error value * on success we return a locked page and 0 */ -static int prepare_uptodate_page(struct page *page, u64 pos) +static int prepare_uptodate_page(struct page *page, u64 pos, + bool force_uptodate) { int ret = 0; - if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { + if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && + !PageUptodate(page)) { ret = btrfs_readpage(NULL, page); if (ret) return ret; @@ -1061,12 +1063,13 @@ static int prepare_uptodate_page(struct page *page, u64 pos) static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, - size_t write_bytes) + size_t write_bytes, bool force_uptodate) { struct extent_state *cached_state = NULL; int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = fdentry(file)->d_inode; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int err = 0; int faili = 0; u64 start_pos; @@ -1078,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, again: for (i = 0; i < num_pages; i++) { pages[i] = find_or_create_page(inode->i_mapping, index + i, - GFP_NOFS); + mask); if (!pages[i]) { faili = i - 1; err = -ENOMEM; @@ -1086,10 +1089,11 @@ again: } if (i == 0) - err = prepare_uptodate_page(pages[i], pos); + err = prepare_uptodate_page(pages[i], pos, + force_uptodate); if (i == num_pages - 1) err = prepare_uptodate_page(pages[i], - pos + write_bytes); + pos + write_bytes, false); if (err) { page_cache_release(pages[i]); faili = i - 1; @@ -1158,6 +1162,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, size_t num_written = 0; int nrptrs; int ret = 0; + bool force_page_uptodate = false; nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / @@ -1200,7 +1205,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * contents of pages from loop to loop */ ret = prepare_pages(root, file, pages, num_pages, - pos, first_index, write_bytes); + pos, first_index, write_bytes, + force_page_uptodate); if (ret) { btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); @@ -1217,12 +1223,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, if (copied < write_bytes) nrptrs = 1; - if (copied == 0) + if (copied == 0) { + force_page_uptodate = true; dirty_pages = 0; - else + } else { + force_page_uptodate = false; dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + } /* * If we had a short copy we need to release the excess delaloc @@ -1607,10 +1616,6 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; } - ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); - if (ret) - goto out; - locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -1656,11 +1661,27 @@ static long btrfs_fallocate(struct file *file, int mode, if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + + /* + * Make sure we have enough space before we do the + * allocation. + */ + ret = btrfs_check_data_free_space(inode, last_byte - + cur_offset); + if (ret) { + free_extent_map(em); + break; + } + ret = btrfs_prealloc_file_range(inode, mode, cur_offset, last_byte - cur_offset, 1 << inode->i_blkbits, offset + len, &alloc_hint); + + /* Let go of our reservation. */ + btrfs_free_reserved_data_space(inode, last_byte - + cur_offset); if (ret < 0) { free_extent_map(em); break; @@ -1686,8 +1707,6 @@ static long btrfs_fallocate(struct file *file, int mode, } unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state, GFP_NOFS); - - btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 41ac927401d..7a15fcfb3e1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -20,6 +20,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/math64.h> +#include <linux/ratelimit.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -84,6 +85,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, *block_group, struct btrfs_path *path) { struct inode *inode = NULL; + u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; spin_lock(&block_group->lock); if (block_group->inode) @@ -98,13 +100,14 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; spin_lock(&block_group->lock); - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { + if (!((BTRFS_I(inode)->flags & flags) == flags)) { printk(KERN_INFO "Old style space inode found, converting.\n"); - BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | + BTRFS_INODE_NODATACOW; block_group->disk_cache_state = BTRFS_DC_CLEAR; } - if (!btrfs_fs_closing(root->fs_info)) { + if (!block_group->iref) { block_group->inode = igrab(inode); block_group->iref = 1; } @@ -122,12 +125,17 @@ int __create_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; + u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; int ret; ret = btrfs_insert_empty_inode(trans, root, path, ino); if (ret) return ret; + /* We inline crc's for the free disk space cache */ + if (ino != BTRFS_FREE_INO_OBJECTID) + flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; + leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); @@ -140,8 +148,7 @@ int __create_free_space_inode(struct btrfs_root *root, btrfs_set_inode_uid(leaf, inode_item, 0); btrfs_set_inode_gid(leaf, inode_item, 0); btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | - BTRFS_INODE_PREALLOC); + btrfs_set_inode_flags(leaf, inode_item, flags); btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_transid(leaf, inode_item, trans->transid); btrfs_set_inode_block_group(leaf, inode_item, offset); @@ -191,16 +198,24 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct inode *inode) { struct btrfs_block_rsv *rsv; + u64 needed_bytes; loff_t oldsize; int ret = 0; rsv = trans->block_rsv; - trans->block_rsv = root->orphan_block_rsv; - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, - 0, 5); - if (ret) - return ret; + trans->block_rsv = &root->fs_info->global_block_rsv; + + /* 1 for slack space, 1 for updating the inode */ + needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + + btrfs_calc_trans_metadata_size(root, 1); + + spin_lock(&trans->block_rsv->lock); + if (trans->block_rsv->reserved < needed_bytes) { + spin_unlock(&trans->block_rsv->lock); + trans->block_rsv = rsv; + return -ENOSPC; + } + spin_unlock(&trans->block_rsv->lock); oldsize = i_size_read(inode); btrfs_i_size_write(inode, 0); @@ -213,13 +228,15 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); - trans->block_rsv = rsv; if (ret) { + trans->block_rsv = rsv; WARN_ON(1); return ret; } ret = btrfs_update_inode(trans, root, inode); + trans->block_rsv = rsv; + return ret; } @@ -242,26 +259,342 @@ static int readahead_cache(struct inode *inode) return 0; } +struct io_ctl { + void *cur, *orig; + struct page *page; + struct page **pages; + struct btrfs_root *root; + unsigned long size; + int index; + int num_pages; + unsigned check_crcs:1; +}; + +static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, + struct btrfs_root *root) +{ + memset(io_ctl, 0, sizeof(struct io_ctl)); + io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, + GFP_NOFS); + if (!io_ctl->pages) + return -ENOMEM; + io_ctl->root = root; + if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) + io_ctl->check_crcs = 1; + return 0; +} + +static void io_ctl_free(struct io_ctl *io_ctl) +{ + kfree(io_ctl->pages); +} + +static void io_ctl_unmap_page(struct io_ctl *io_ctl) +{ + if (io_ctl->cur) { + kunmap(io_ctl->page); + io_ctl->cur = NULL; + io_ctl->orig = NULL; + } +} + +static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) +{ + WARN_ON(io_ctl->cur); + BUG_ON(io_ctl->index >= io_ctl->num_pages); + io_ctl->page = io_ctl->pages[io_ctl->index++]; + io_ctl->cur = kmap(io_ctl->page); + io_ctl->orig = io_ctl->cur; + io_ctl->size = PAGE_CACHE_SIZE; + if (clear) + memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); +} + +static void io_ctl_drop_pages(struct io_ctl *io_ctl) +{ + int i; + + io_ctl_unmap_page(io_ctl); + + for (i = 0; i < io_ctl->num_pages; i++) { + ClearPageChecked(io_ctl->pages[i]); + unlock_page(io_ctl->pages[i]); + page_cache_release(io_ctl->pages[i]); + } +} + +static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, + int uptodate) +{ + struct page *page; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); + int i; + + for (i = 0; i < io_ctl->num_pages; i++) { + page = find_or_create_page(inode->i_mapping, i, mask); + if (!page) { + io_ctl_drop_pages(io_ctl); + return -ENOMEM; + } + io_ctl->pages[i] = page; + if (uptodate && !PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + printk(KERN_ERR "btrfs: error reading free " + "space cache\n"); + io_ctl_drop_pages(io_ctl); + return -EIO; + } + } + } + + return 0; +} + +static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) +{ + u64 *val; + + io_ctl_map_page(io_ctl, 1); + + /* + * Skip the csum areas. If we don't check crcs then we just have a + * 64bit chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); + io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + val = io_ctl->cur; + *val = cpu_to_le64(generation); + io_ctl->cur += sizeof(u64); +} + +static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) +{ + u64 *gen; + + /* + * Skip the crc area. If we don't check crcs then we just have a 64bit + * chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += sizeof(u32) * io_ctl->num_pages; + io_ctl->size -= sizeof(u64) + + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + gen = io_ctl->cur; + if (le64_to_cpu(*gen) != generation) { + printk_ratelimited(KERN_ERR "btrfs: space cache generation " + "(%Lu) does not match inode (%Lu)\n", *gen, + generation); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + io_ctl->cur += sizeof(u64); + return 0; +} + +static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_unmap_page(io_ctl); + return; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages;; + + crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, + PAGE_CACHE_SIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + io_ctl_unmap_page(io_ctl); + tmp = kmap(io_ctl->pages[0]); + tmp += index; + *tmp = crc; + kunmap(io_ctl->pages[0]); +} + +static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp, val; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_map_page(io_ctl, 0); + return 0; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; + + tmp = kmap(io_ctl->pages[0]); + tmp += index; + val = *tmp; + kunmap(io_ctl->pages[0]); + + io_ctl_map_page(io_ctl, 0); + crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, + PAGE_CACHE_SIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + if (val != crc) { + printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " + "space cache\n"); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + + return 0; +} + +static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, + void *bitmap) +{ + struct btrfs_free_space_entry *entry; + + if (!io_ctl->cur) + return -ENOSPC; + + entry = io_ctl->cur; + entry->offset = cpu_to_le64(offset); + entry->bytes = cpu_to_le64(bytes); + entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : + BTRFS_FREE_SPACE_EXTENT; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + + /* No more pages to map */ + if (io_ctl->index >= io_ctl->num_pages) + return 0; + + /* map the next page */ + io_ctl_map_page(io_ctl, 1); + return 0; +} + +static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) +{ + if (!io_ctl->cur) + return -ENOSPC; + + /* + * If we aren't at the start of the current page, unmap this one and + * map the next one if there is any left. + */ + if (io_ctl->cur != io_ctl->orig) { + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + if (io_ctl->index >= io_ctl->num_pages) + return -ENOSPC; + io_ctl_map_page(io_ctl, 0); + } + + memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + if (io_ctl->index < io_ctl->num_pages) + io_ctl_map_page(io_ctl, 0); + return 0; +} + +static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) +{ + /* + * If we're not on the boundary we know we've modified the page and we + * need to crc the page. + */ + if (io_ctl->cur != io_ctl->orig) + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + else + io_ctl_unmap_page(io_ctl); + + while (io_ctl->index < io_ctl->num_pages) { + io_ctl_map_page(io_ctl, 1); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + } +} + +static int io_ctl_read_entry(struct io_ctl *io_ctl, + struct btrfs_free_space *entry, u8 *type) +{ + struct btrfs_free_space_entry *e; + + e = io_ctl->cur; + entry->offset = le64_to_cpu(e->offset); + entry->bytes = le64_to_cpu(e->bytes); + *type = e->type; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_unmap_page(io_ctl); + + if (io_ctl->index >= io_ctl->num_pages) + return 0; + + return io_ctl_check_crc(io_ctl, io_ctl->index); +} + +static int io_ctl_read_bitmap(struct io_ctl *io_ctl, + struct btrfs_free_space *entry) +{ + int ret; + + if (io_ctl->cur && io_ctl->cur != io_ctl->orig) + io_ctl_unmap_page(io_ctl); + + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + + memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); + io_ctl_unmap_page(io_ctl); + + return 0; +} + int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_path *path, u64 offset) { struct btrfs_free_space_header *header; struct extent_buffer *leaf; - struct page *page; + struct io_ctl io_ctl; struct btrfs_key key; + struct btrfs_free_space *e, *n; struct list_head bitmaps; u64 num_entries; u64 num_bitmaps; u64 generation; - pgoff_t index = 0; + u8 type; int ret = 0; INIT_LIST_HEAD(&bitmaps); /* Nothing in the space cache, goodbye */ if (!i_size_read(inode)) - goto out; + return 0; key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; @@ -269,11 +602,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - goto out; + return 0; else if (ret > 0) { btrfs_release_path(path); - ret = 0; - goto out; + return 0; } ret = -1; @@ -291,169 +623,100 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, " not match free space cache generation (%llu)\n", (unsigned long long)BTRFS_I(inode)->generation, (unsigned long long)generation); - goto out; + return 0; } if (!num_entries) - goto out; + return 0; + io_ctl_init(&io_ctl, inode, root); ret = readahead_cache(inode); if (ret) goto out; - while (1) { - struct btrfs_free_space_entry *entry; - struct btrfs_free_space *e; - void *addr; - unsigned long offset = 0; - int need_loop = 0; + ret = io_ctl_prepare_pages(&io_ctl, inode, 1); + if (ret) + goto out; - if (!num_entries && !num_bitmaps) - break; + ret = io_ctl_check_crc(&io_ctl, 0); + if (ret) + goto free_cache; - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) + ret = io_ctl_check_generation(&io_ctl, generation); + if (ret) + goto free_cache; + + while (num_entries) { + e = kmem_cache_zalloc(btrfs_free_space_cachep, + GFP_NOFS); + if (!e) goto free_cache; - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - page_cache_release(page); - printk(KERN_ERR "btrfs: error reading free " - "space cache\n"); - goto free_cache; - } + ret = io_ctl_read_entry(&io_ctl, e, &type); + if (ret) { + kmem_cache_free(btrfs_free_space_cachep, e); + goto free_cache; } - addr = kmap(page); - if (index == 0) { - u64 *gen; + if (!e->bytes) { + kmem_cache_free(btrfs_free_space_cachep, e); + goto free_cache; + } - /* - * We put a bogus crc in the front of the first page in - * case old kernels try to mount a fs with the new - * format to make sure they discard the cache. - */ - addr += sizeof(u64); - offset += sizeof(u64); - - gen = addr; - if (*gen != BTRFS_I(inode)->generation) { - printk(KERN_ERR "btrfs: space cache generation" - " (%llu) does not match inode (%llu)\n", - (unsigned long long)*gen, - (unsigned long long) - BTRFS_I(inode)->generation); - kunmap(page); - unlock_page(page); - page_cache_release(page); + if (type == BTRFS_FREE_SPACE_EXTENT) { + spin_lock(&ctl->tree_lock); + ret = link_free_space(ctl, e); + spin_unlock(&ctl->tree_lock); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } - addr += sizeof(u64); - offset += sizeof(u64); - } - entry = addr; - - while (1) { - if (!num_entries) - break; - - need_loop = 1; - e = kmem_cache_zalloc(btrfs_free_space_cachep, - GFP_NOFS); - if (!e) { - kunmap(page); - unlock_page(page); - page_cache_release(page); + } else { + BUG_ON(!num_bitmaps); + num_bitmaps--; + e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!e->bitmap) { + kmem_cache_free( + btrfs_free_space_cachep, e); goto free_cache; } - - e->offset = le64_to_cpu(entry->offset); - e->bytes = le64_to_cpu(entry->bytes); - if (!e->bytes) { - kunmap(page); + spin_lock(&ctl->tree_lock); + ret = link_free_space(ctl, e); + ctl->total_bitmaps++; + ctl->op->recalc_thresholds(ctl); + spin_unlock(&ctl->tree_lock); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); kmem_cache_free(btrfs_free_space_cachep, e); - unlock_page(page); - page_cache_release(page); goto free_cache; } - - if (entry->type == BTRFS_FREE_SPACE_EXTENT) { - spin_lock(&ctl->tree_lock); - ret = link_free_space(ctl, e); - spin_unlock(&ctl->tree_lock); - if (ret) { - printk(KERN_ERR "Duplicate entries in " - "free space cache, dumping\n"); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - } else { - e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!e->bitmap) { - kunmap(page); - kmem_cache_free( - btrfs_free_space_cachep, e); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - spin_lock(&ctl->tree_lock); - ret = link_free_space(ctl, e); - ctl->total_bitmaps++; - ctl->op->recalc_thresholds(ctl); - spin_unlock(&ctl->tree_lock); - if (ret) { - printk(KERN_ERR "Duplicate entries in " - "free space cache, dumping\n"); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - list_add_tail(&e->list, &bitmaps); - } - - num_entries--; - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - break; - entry++; + list_add_tail(&e->list, &bitmaps); } - /* - * We read an entry out of this page, we need to move on to the - * next page. - */ - if (need_loop) { - kunmap(page); - goto next; - } + num_entries--; + } - /* - * We add the bitmaps at the end of the entries in order that - * the bitmap entries are added to the cache. - */ - e = list_entry(bitmaps.next, struct btrfs_free_space, list); + /* + * We add the bitmaps at the end of the entries in order that + * the bitmap entries are added to the cache. + */ + list_for_each_entry_safe(e, n, &bitmaps, list) { list_del_init(&e->list); - memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); - kunmap(page); - num_bitmaps--; -next: - unlock_page(page); - page_cache_release(page); - index++; + ret = io_ctl_read_bitmap(&io_ctl, e); + if (ret) + goto free_cache; } + io_ctl_drop_pages(&io_ctl); ret = 1; out: + io_ctl_free(&io_ctl); return ret; free_cache: + io_ctl_drop_pages(&io_ctl); __btrfs_remove_free_space_cache(ctl); goto out; } @@ -465,7 +728,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root = fs_info->tree_root; struct inode *inode; struct btrfs_path *path; - int ret; + int ret = 0; bool matched; u64 used = btrfs_block_group_used(&block_group->item); @@ -497,6 +760,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, return 0; } + /* We may have converted the inode and made the cache invalid. */ + spin_lock(&block_group->lock); + if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { + spin_unlock(&block_group->lock); + goto out; + } + spin_unlock(&block_group->lock); + ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, path, block_group->key.objectid); btrfs_free_path(path); @@ -530,6 +801,19 @@ out: return ret; } +/** + * __btrfs_write_out_cache - write out cached info to an inode + * @root - the root the inode belongs to + * @ctl - the free space cache we are going to write out + * @block_group - the block_group for this cache if it belongs to a block_group + * @trans - the trans handle + * @path - the path to use + * @offset - the offset for the key we'll insert + * + * This function writes out a free space cache struct to disk for quick recovery + * on mount. This will return 0 if it was successfull in writing the cache out, + * and -1 if it was not. + */ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_block_group_cache *block_group, @@ -540,42 +824,24 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct extent_buffer *leaf; struct rb_node *node; struct list_head *pos, *n; - struct page **pages; - struct page *page; struct extent_state *cached_state = NULL; struct btrfs_free_cluster *cluster = NULL; struct extent_io_tree *unpin = NULL; + struct io_ctl io_ctl; struct list_head bitmap_list; struct btrfs_key key; u64 start, end, len; - u64 bytes = 0; - u32 crc = ~(u32)0; - int index = 0, num_pages = 0; int entries = 0; int bitmaps = 0; - int ret = -1; - bool next_page = false; - bool out_of_space = false; + int ret; + int err = -1; INIT_LIST_HEAD(&bitmap_list); - node = rb_first(&ctl->free_space_offset); - if (!node) - return 0; - if (!i_size_read(inode)) return -1; - num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - - filemap_write_and_wait(inode->i_mapping); - btrfs_wait_ordered_range(inode, inode->i_size & - ~(root->sectorsize - 1), (u64)-1); - - pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); - if (!pages) - return -1; + io_ctl_init(&io_ctl, inode, root); /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) @@ -589,30 +855,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, */ unpin = root->fs_info->pinned_extents; - /* - * Lock all pages first so we can lock the extent safely. - * - * NOTE: Because we hold the ref the entire time we're going to write to - * the page find_get_page should never fail, so we don't do a check - * after find_get_page at this point. Just putting this here so people - * know and don't freak out. - */ - while (index < num_pages) { - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - int i; - - for (i = 0; i < num_pages; i++) { - unlock_page(pages[i]); - page_cache_release(pages[i]); - } - goto out; - } - pages[index] = page; - index++; - } + /* Lock all pages first so we can lock the extent safely. */ + io_ctl_prepare_pages(&io_ctl, inode, 0); - index = 0; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state, GFP_NOFS); @@ -623,189 +868,111 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (block_group) start = block_group->key.objectid; - /* Write out the extent entries */ - do { - struct btrfs_free_space_entry *entry; - void *addr, *orig; - unsigned long offset = 0; + node = rb_first(&ctl->free_space_offset); + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; + } - next_page = false; + /* Make sure we can fit our crcs into the first page */ + if (io_ctl.check_crcs && + (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { + WARN_ON(1); + goto out_nospc; + } - if (index >= num_pages) { - out_of_space = true; - break; - } + io_ctl_set_generation(&io_ctl, trans->transid); - page = pages[index]; + /* Write out the extent entries */ + while (node) { + struct btrfs_free_space *e; - orig = addr = kmap(page); - if (index == 0) { - u64 *gen; + e = rb_entry(node, struct btrfs_free_space, offset_index); + entries++; - /* - * We're going to put in a bogus crc for this page to - * make sure that old kernels who aren't aware of this - * format will be sure to discard the cache. - */ - addr += sizeof(u64); - offset += sizeof(u64); + ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, + e->bitmap); + if (ret) + goto out_nospc; - gen = addr; - *gen = trans->transid; - addr += sizeof(u64); - offset += sizeof(u64); + if (e->bitmap) { + list_add_tail(&e->list, &bitmap_list); + bitmaps++; } - entry = addr; - - memset(addr, 0, PAGE_CACHE_SIZE - offset); - while (node && !next_page) { - struct btrfs_free_space *e; - - e = rb_entry(node, struct btrfs_free_space, offset_index); - entries++; - - entry->offset = cpu_to_le64(e->offset); - entry->bytes = cpu_to_le64(e->bytes); - if (e->bitmap) { - entry->type = BTRFS_FREE_SPACE_BITMAP; - list_add_tail(&e->list, &bitmap_list); - bitmaps++; - } else { - entry->type = BTRFS_FREE_SPACE_EXTENT; - } - node = rb_next(node); - if (!node && cluster) { - node = rb_first(&cluster->root); - cluster = NULL; - } - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - next_page = true; - entry++; + node = rb_next(node); + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; } + } - /* - * We want to add any pinned extents to our free space cache - * so we don't leak the space - */ - while (block_group && !next_page && - (start < block_group->key.objectid + - block_group->key.offset)) { - ret = find_first_extent_bit(unpin, start, &start, &end, - EXTENT_DIRTY); - if (ret) { - ret = 0; - break; - } - - /* This pinned extent is out of our range */ - if (start >= block_group->key.objectid + - block_group->key.offset) - break; - - len = block_group->key.objectid + - block_group->key.offset - start; - len = min(len, end + 1 - start); - - entries++; - entry->offset = cpu_to_le64(start); - entry->bytes = cpu_to_le64(len); - entry->type = BTRFS_FREE_SPACE_EXTENT; - - start = end + 1; - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - next_page = true; - entry++; + /* + * We want to add any pinned extents to our free space cache + * so we don't leak the space + */ + while (block_group && (start < block_group->key.objectid + + block_group->key.offset)) { + ret = find_first_extent_bit(unpin, start, &start, &end, + EXTENT_DIRTY); + if (ret) { + ret = 0; + break; } - /* Generate bogus crc value */ - if (index == 0) { - u32 *tmp; - crc = btrfs_csum_data(root, orig + sizeof(u64), crc, - PAGE_CACHE_SIZE - sizeof(u64)); - btrfs_csum_final(crc, (char *)&crc); - crc++; - tmp = orig; - *tmp = crc; - } + /* This pinned extent is out of our range */ + if (start >= block_group->key.objectid + + block_group->key.offset) + break; - kunmap(page); + len = block_group->key.objectid + + block_group->key.offset - start; + len = min(len, end + 1 - start); - bytes += PAGE_CACHE_SIZE; + entries++; + ret = io_ctl_add_entry(&io_ctl, start, len, NULL); + if (ret) + goto out_nospc; - index++; - } while (node || next_page); + start = end + 1; + } /* Write out the bitmaps */ list_for_each_safe(pos, n, &bitmap_list) { - void *addr; struct btrfs_free_space *entry = list_entry(pos, struct btrfs_free_space, list); - if (index >= num_pages) { - out_of_space = true; - break; - } - page = pages[index]; - - addr = kmap(page); - memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); - kunmap(page); - bytes += PAGE_CACHE_SIZE; - + ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); + if (ret) + goto out_nospc; list_del_init(&entry->list); - index++; - } - - if (out_of_space) { - btrfs_drop_pages(pages, num_pages); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, &cached_state, - GFP_NOFS); - ret = 0; - goto out; } /* Zero out the rest of the pages just to make sure */ - while (index < num_pages) { - void *addr; - - page = pages[index]; - addr = kmap(page); - memset(addr, 0, PAGE_CACHE_SIZE); - kunmap(page); - bytes += PAGE_CACHE_SIZE; - index++; - } + io_ctl_zero_remaining_pages(&io_ctl); - ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, - bytes, &cached_state); - btrfs_drop_pages(pages, num_pages); + ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, + 0, i_size_read(inode), &cached_state); + io_ctl_drop_pages(&io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state, GFP_NOFS); - if (ret) { - ret = 0; + if (ret) goto out; - } - BTRFS_I(inode)->generation = trans->transid; - filemap_write_and_wait(inode->i_mapping); + ret = filemap_write_and_wait(inode->i_mapping); + if (ret) + goto out; key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; key.type = 0; - ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { - ret = -1; - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, + GFP_NOFS); goto out; } leaf = path->nodes[0]; @@ -816,15 +983,16 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || found_key.offset != offset) { - ret = -1; - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, - GFP_NOFS); + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, + inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, + NULL, GFP_NOFS); btrfs_release_path(path); goto out; } } + + BTRFS_I(inode)->generation = trans->transid; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); btrfs_set_free_space_entries(leaf, header, entries); @@ -833,16 +1001,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - ret = 1; - + err = 0; out: - kfree(pages); - if (ret != 1) { - invalidate_inode_pages2_range(inode->i_mapping, 0, index); + io_ctl_free(&io_ctl); + if (err) { + invalidate_inode_pages2(inode->i_mapping); BTRFS_I(inode)->generation = 0; } btrfs_update_inode(trans, root, inode); - return ret; + return err; + +out_nospc: + list_for_each_safe(pos, n, &bitmap_list) { + struct btrfs_free_space *entry = + list_entry(pos, struct btrfs_free_space, list); + list_del_init(&entry->list); + } + io_ctl_drop_pages(&io_ctl); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, &cached_state, GFP_NOFS); + goto out; } int btrfs_write_out_cache(struct btrfs_root *root, @@ -869,14 +1047,15 @@ int btrfs_write_out_cache(struct btrfs_root *root, ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, path, block_group->key.objectid); - if (ret < 0) { + if (ret) { spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&block_group->lock); ret = 0; - +#ifdef DEBUG printk(KERN_ERR "btrfs: failed to write free space cace " "for block group %llu\n", block_group->key.objectid); +#endif } iput(inode); @@ -1701,6 +1880,7 @@ again: ctl->total_bitmaps--; } kmem_cache_free(btrfs_free_space_cachep, info); + ret = 0; goto out_lock; } @@ -1708,7 +1888,8 @@ again: unlink_free_space(ctl, info); info->offset += bytes; info->bytes -= bytes; - link_free_space(ctl, info); + ret = link_free_space(ctl, info); + WARN_ON(ret); goto out_lock; } @@ -2472,9 +2653,19 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, spin_unlock(&ctl->tree_lock); if (bytes >= minlen) { - int update_ret; - update_ret = btrfs_update_reserved_bytes(block_group, - bytes, 1, 1); + struct btrfs_space_info *space_info; + int update = 0; + + space_info = block_group->space_info; + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (!block_group->ro) { + block_group->reserved += bytes; + space_info->bytes_reserved += bytes; + update = 1; + } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); ret = btrfs_error_discard_extent(fs_info->extent_root, start, @@ -2482,9 +2673,16 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, &actually_trimmed); btrfs_add_free_space(block_group, start, bytes); - if (!update_ret) - btrfs_update_reserved_bytes(block_group, - bytes, 0, 1); + if (update) { + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (block_group->ro) + space_info->bytes_readonly += bytes; + block_group->reserved -= bytes; + space_info->bytes_reserved -= bytes; + spin_unlock(&space_info->lock); + spin_unlock(&block_group->lock); + } if (ret) break; @@ -2643,9 +2841,13 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, return 0; ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); - if (ret < 0) + if (ret) { + btrfs_delalloc_release_metadata(inode, inode->i_size); +#ifdef DEBUG printk(KERN_ERR "btrfs: failed to write free ino cache " "for root %llu\n", root->root_key.objectid); +#endif + } iput(inode); return ret; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b4087e0fa87..53dcbdf446c 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -465,14 +465,16 @@ again: /* Just to make sure we have enough space */ prealloc += 8 * PAGE_CACHE_SIZE; - ret = btrfs_check_data_free_space(inode, prealloc); + ret = btrfs_delalloc_reserve_space(inode, prealloc); if (ret) goto out_put; ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); - if (ret) + if (ret) { + btrfs_delalloc_release_space(inode, prealloc); goto out_put; + } btrfs_free_reserved_data_space(inode, prealloc); out_put: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9327f45434e..9d0eaa57d4e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -393,7 +393,10 @@ again: (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); - BUG_ON(!pages); + if (!pages) { + /* just bail out to the uncompressed code */ + goto cont; + } if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; @@ -424,6 +427,7 @@ again: will_compress = 1; } } +cont: if (start == 0) { trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -820,7 +824,7 @@ static noinline int cow_file_range(struct inode *inode, } BUG_ON(disk_num_bytes > - btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_super_total_bytes(root->fs_info->super_copy)); alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); @@ -1792,12 +1796,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) } ret = 0; out: - if (nolock) { - if (trans) - btrfs_end_transaction_nolock(trans, root); - } else { + if (root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, ordered_extent->len); - if (trans) + if (trans) { + if (nolock) + btrfs_end_transaction_nolock(trans, root); + else btrfs_end_transaction(trans, root); } @@ -1931,89 +1935,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) up_read(&root->fs_info->cleanup_work_sem); } -/* - * calculate extra metadata reservation when snapshotting a subvolume - * contains orphan files. - */ -void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve) -{ - struct btrfs_root *root; - struct btrfs_block_rsv *block_rsv; - u64 num_bytes; - int index; - - root = pending->root; - if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) - return; - - block_rsv = root->orphan_block_rsv; - - /* orphan block reservation for the snapshot */ - num_bytes = block_rsv->size; - - /* - * after the snapshot is created, COWing tree blocks may use more - * space than it frees. So we should make sure there is enough - * reserved space. - */ - index = trans->transid & 0x1; - if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { - num_bytes += block_rsv->size - - (block_rsv->reserved + block_rsv->freed[index]); - } - - *bytes_to_reserve += num_bytes; -} - -void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending) -{ - struct btrfs_root *root = pending->root; - struct btrfs_root *snap = pending->snap; - struct btrfs_block_rsv *block_rsv; - u64 num_bytes; - int index; - int ret; - - if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) - return; - - /* refill source subvolume's orphan block reservation */ - block_rsv = root->orphan_block_rsv; - index = trans->transid & 0x1; - if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { - num_bytes = block_rsv->size - - (block_rsv->reserved + block_rsv->freed[index]); - ret = btrfs_block_rsv_migrate(&pending->block_rsv, - root->orphan_block_rsv, - num_bytes); - BUG_ON(ret); - } - - /* setup orphan block reservation for the snapshot */ - block_rsv = btrfs_alloc_block_rsv(snap); - BUG_ON(!block_rsv); - - btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - snap->orphan_block_rsv = block_rsv; - - num_bytes = root->orphan_block_rsv->size; - ret = btrfs_block_rsv_migrate(&pending->block_rsv, - block_rsv, num_bytes); - BUG_ON(ret); - -#if 0 - /* insert orphan item for the snapshot */ - WARN_ON(!root->orphan_item_inserted); - ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, - snap->root_key.objectid); - BUG_ON(ret); - snap->orphan_item_inserted = 1; -#endif -} - enum btrfs_orphan_cleanup_state { ORPHAN_CLEANUP_STARTED = 1, ORPHAN_CLEANUP_DONE = 2, @@ -2099,9 +2020,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) } spin_unlock(&root->orphan_lock); - if (block_rsv) - btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); @@ -2168,6 +2086,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) struct btrfs_key key, found_key; struct btrfs_trans_handle *trans; struct inode *inode; + u64 last_objectid = 0; int ret = 0, nr_unlink = 0, nr_truncate = 0; if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) @@ -2219,41 +2138,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * crossing root thing. we store the inode number in the * offset of the orphan item. */ + + if (found_key.offset == last_objectid) { + printk(KERN_ERR "btrfs: Error removing orphan entry, " + "stopping orphan cleanup\n"); + ret = -EINVAL; + goto out; + } + + last_objectid = found_key.offset; + found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); + ret = PTR_RET(inode); + if (ret && ret != -ESTALE) goto out; - } /* - * add this inode to the orphan list so btrfs_orphan_del does - * the proper thing when we hit it - */ - spin_lock(&root->orphan_lock); - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); - - /* - * if this is a bad inode, means we actually succeeded in - * removing the inode, but not the orphan record, which means - * we need to manually delete the orphan since iput will just - * do a destroy_inode + * Inode is already gone but the orphan item is still there, + * kill the orphan item. */ - if (is_bad_inode(inode)) { - trans = btrfs_start_transaction(root, 0); + if (ret == -ESTALE) { + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } - btrfs_orphan_del(trans, inode); + ret = btrfs_del_orphan_item(trans, root, + found_key.objectid); + BUG_ON(ret); btrfs_end_transaction(trans, root); - iput(inode); continue; } + /* + * add this inode to the orphan list so btrfs_orphan_del does + * the proper thing when we hit it + */ + spin_lock(&root->orphan_lock); + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + spin_unlock(&root->orphan_lock); + /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { if (!S_ISREG(inode->i_mode)) { @@ -2687,7 +2614,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, u64 ino = btrfs_ino(inode); u64 dir_ino = btrfs_ino(dir); - trans = btrfs_start_transaction(root, 10); + /* + * 1 for the possible orphan item + * 1 for the dir item + * 1 for the dir index + * 1 for the inode ref + * 1 for the inode ref in the tree log + * 2 for the dir entries in the log + * 1 for the inode + */ + trans = btrfs_start_transaction(root, 8); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; @@ -2710,7 +2646,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, return ERR_PTR(-ENOMEM); } - trans = btrfs_start_transaction(root, 0); + /* 1 for the orphan item */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { btrfs_free_path(path); root->fs_info->enospc_unlink = 0; @@ -2815,6 +2752,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, err = 0; out: btrfs_free_path(path); + /* Migrate the orphan reservation over */ + if (!err) + err = btrfs_block_rsv_migrate(trans->block_rsv, + &root->fs_info->global_block_rsv, + trans->bytes_reserved); + if (err) { btrfs_end_transaction(trans, root); root->fs_info->enospc_unlink = 0; @@ -2829,6 +2772,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (trans->block_rsv == &root->fs_info->global_block_rsv) { + btrfs_block_rsv_release(root, trans->block_rsv, + trans->bytes_reserved); + trans->block_rsv = &root->fs_info->trans_block_rsv; BUG_ON(!root->fs_info->enospc_unlink); root->fs_info->enospc_unlink = 0; } @@ -3220,6 +3166,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; + gfp_t mask = btrfs_alloc_write_mask(mapping); int ret = 0; u64 page_start; u64 page_end; @@ -3232,7 +3179,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ret = -ENOMEM; again: - page = find_or_create_page(mapping, index, GFP_NOFS); + page = find_or_create_page(mapping, index, mask); if (!page) { btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); goto out; @@ -3465,6 +3412,8 @@ void btrfs_evict_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_rsv *rsv, *global_rsv; + u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); unsigned long nr; int ret; @@ -3492,22 +3441,55 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } + rsv = btrfs_alloc_block_rsv(root); + if (!rsv) { + btrfs_orphan_del(NULL, inode); + goto no_delete; + } + rsv->size = min_size; + global_rsv = &root->fs_info->global_block_rsv; + btrfs_i_size_write(inode, 0); + /* + * This is a bit simpler than btrfs_truncate since + * + * 1) We've already reserved our space for our orphan item in the + * unlink. + * 2) We're going to delete the inode item, so we don't need to update + * it at all. + * + * So we just need to reserve some slack space in case we add bytes when + * doing the truncate. + */ while (1) { - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - trans->block_rsv = root->orphan_block_rsv; + ret = btrfs_block_rsv_refill(root, rsv, min_size); + + /* + * Try and steal from the global reserve since we will + * likely not use this space anyway, we want to try as + * hard as possible to get this to work. + */ + if (ret) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, 0, 5); if (ret) { - BUG_ON(ret != -EAGAIN); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - continue; + printk(KERN_WARNING "Could not get space for a " + "delete, will truncate on mount %d\n", ret); + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; + } + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; } + trans->block_rsv = rsv; + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); if (ret != -EAGAIN) break; @@ -3516,14 +3498,17 @@ void btrfs_evict_inode(struct inode *inode) btrfs_end_transaction(trans, root); trans = NULL; btrfs_btree_balance_dirty(root, nr); - } + btrfs_free_block_rsv(root, rsv); + if (ret == 0) { + trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, inode); BUG_ON(ret); } + trans->block_rsv = &root->fs_info->trans_block_rsv; if (!(root == root->fs_info->tree_root || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) btrfs_return_ino(root, btrfs_ino(inode)); @@ -5647,8 +5632,7 @@ again: if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { ret = btrfs_ordered_update_i_size(inode, 0, ordered); if (!ret) - ret = btrfs_update_inode(trans, root, inode); - err = ret; + err = btrfs_update_inode(trans, root, inode); goto out; } @@ -6393,6 +6377,7 @@ static int btrfs_truncate(struct inode *inode) struct btrfs_trans_handle *trans; unsigned long nr; u64 mask = root->sectorsize - 1; + u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) @@ -6440,19 +6425,23 @@ static int btrfs_truncate(struct inode *inode) rsv = btrfs_alloc_block_rsv(root); if (!rsv) return -ENOMEM; - btrfs_add_durable_block_rsv(root->fs_info, rsv); + rsv->size = min_size; + /* + * 1 for the truncate slack space + * 1 for the orphan item we're going to add + * 1 for the orphan item deletion + * 1 for updating the inode. + */ trans = btrfs_start_transaction(root, 4); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out; } - /* - * Reserve space for the truncate process. Truncate should be adding - * space, but if there are snapshots it may end up using space. - */ - ret = btrfs_truncate_reserve_metadata(trans, root, rsv); + /* Migrate the slack space for the truncate to our reserve */ + ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, + min_size); BUG_ON(ret); ret = btrfs_orphan_add(trans, inode); @@ -6461,21 +6450,6 @@ static int btrfs_truncate(struct inode *inode) goto out; } - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - - /* - * Ok so we've already migrated our bytes over for the truncate, so here - * just reserve the one slot we need for updating the inode. - */ - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out; - } - trans->block_rsv = rsv; - /* * setattr is responsible for setting the ordered_data_close flag, * but that is only tested during the last file release. That @@ -6497,20 +6471,30 @@ static int btrfs_truncate(struct inode *inode) btrfs_add_ordered_operation(trans, root, inode); while (1) { + ret = btrfs_block_rsv_refill(root, rsv, min_size); + if (ret) { + /* + * This can only happen with the original transaction we + * started above, every other time we shouldn't have a + * transaction started yet. + */ + if (ret == -EAGAIN) + goto end_trans; + err = ret; + break; + } + if (!trans) { - trans = btrfs_start_transaction(root, 3); + /* Just need the 1 for updating the inode */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out; } - - ret = btrfs_truncate_reserve_metadata(trans, root, - rsv); - BUG_ON(ret); - - trans->block_rsv = rsv; } + trans->block_rsv = rsv; + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); @@ -6525,7 +6509,7 @@ static int btrfs_truncate(struct inode *inode) err = ret; break; } - +end_trans: nr = trans->blocks_used; btrfs_end_transaction(trans, root); trans = NULL; @@ -6607,9 +6591,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; - ei->reserved_bytes = 0; ei->disk_i_size = 0; ei->flags = 0; + ei->csum_bytes = 0; ei->index_cnt = (u64)-1; ei->last_unlink_trans = 0; @@ -6655,6 +6639,8 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(inode->i_data.nrpages); WARN_ON(BTRFS_I(inode)->outstanding_extents); WARN_ON(BTRFS_I(inode)->reserved_extents); + WARN_ON(BTRFS_I(inode)->delalloc_bytes); + WARN_ON(BTRFS_I(inode)->csum_bytes); /* * This can happen where we create an inode, but somebody else also diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7f57efa76d1..cc989399034 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -118,7 +118,7 @@ void btrfs_update_iflags(struct inode *inode) /* * Inherit flags from the parent inode. * - * Unlike extN we don't have any flags we don't want to inherit currently. + * Currently only the compression flags and the cow flags are inherited. */ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) { @@ -129,12 +129,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) flags = BTRFS_I(dir)->flags; - if (S_ISREG(inode->i_mode)) - flags &= ~BTRFS_INODE_DIRSYNC; - else if (!S_ISDIR(inode->i_mode)) - flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); + if (flags & BTRFS_INODE_NOCOMPRESS) { + BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + } else if (flags & BTRFS_INODE_COMPRESS) { + BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; + BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; + } + + if (flags & BTRFS_INODE_NODATACOW) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - BTRFS_I(inode)->flags = flags; btrfs_update_iflags(inode); } @@ -278,6 +283,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; + u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -296,12 +302,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) } } rcu_read_unlock(); + if (!num_devices) return -EOPNOTSUPP; - if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; + if (range.start > total_bytes) + return -EINVAL; + range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(root, &range); if (ret < 0) @@ -761,7 +770,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, int ret = 1; /* - * make sure that once we start defragging and extent, we keep on + * make sure that once we start defragging an extent, we keep on * defragging it */ if (start < *defrag_end) @@ -806,7 +815,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, * extent will force at least part of that big extent to be defragged. */ if (ret) { - *last_len += len; *defrag_end = extent_map_end(em); } else { *last_len = 0; @@ -844,6 +852,7 @@ static int cluster_pages_for_defrag(struct inode *inode, int i_done; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); if (isize == 0) return 0; @@ -861,7 +870,7 @@ again: for (i = 0; i < num_pages; i++) { struct page *page; page = find_or_create_page(inode->i_mapping, - start_index + i, GFP_NOFS); + start_index + i, mask); if (!page) break; @@ -973,18 +982,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, struct btrfs_super_block *disk_super; struct file_ra_state *ra = NULL; unsigned long last_index; + u64 isize = i_size_read(inode); u64 features; u64 last_len = 0; u64 skip = 0; u64 defrag_end = 0; u64 newer_off = range->start; - int newer_left = 0; unsigned long i; + unsigned long ra_index = 0; int ret; int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; int extent_thresh = range->extent_thresh; - int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + int cluster = max_cluster; u64 new_align = ~((u64)128 * 1024 - 1); struct page **pages = NULL; @@ -998,7 +1009,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, compress_type = range->compress_type; } - if (inode->i_size == 0) + if (isize == 0) return 0; /* @@ -1014,7 +1025,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra = &file->f_ra; } - pages = kmalloc(sizeof(struct page *) * newer_cluster, + pages = kmalloc(sizeof(struct page *) * max_cluster, GFP_NOFS); if (!pages) { ret = -ENOMEM; @@ -1023,10 +1034,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, /* find the last page to defrag */ if (range->start + range->len > range->start) { - last_index = min_t(u64, inode->i_size - 1, + last_index = min_t(u64, isize - 1, range->start + range->len - 1) >> PAGE_CACHE_SHIFT; } else { - last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + last_index = (isize - 1) >> PAGE_CACHE_SHIFT; } if (newer_than) { @@ -1039,16 +1050,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * the extents in the file evenly spaced */ i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; - newer_left = newer_cluster; } else goto out_ra; } else { i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index - 1; + max_to_defrag = last_index; + + /* + * make writeback starts from i, so the defrag range can be + * written sequentially. + */ + if (i < inode->i_mapping->writeback_index) + inode->i_mapping->writeback_index = i; - while (i <= last_index && defrag_count < max_to_defrag) { + while (i <= last_index && defrag_count < max_to_defrag && + (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) { /* * make sure we stop running if someone unmounts * the FS @@ -1071,18 +1090,31 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = max(i + 1, next); continue; } + + if (!newer_than) { + cluster = (PAGE_CACHE_ALIGN(defrag_end) >> + PAGE_CACHE_SHIFT) - i; + cluster = min(cluster, max_cluster); + } else { + cluster = max_cluster; + } + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) BTRFS_I(inode)->force_compress = compress_type; - btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); + if (i + cluster > ra_index) { + ra_index = max(i, ra_index); + btrfs_force_ra(inode->i_mapping, ra, file, ra_index, + cluster); + ra_index += max_cluster; + } - ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); + ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) goto out_ra; defrag_count += ret; balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); - i += ret; if (newer_than) { if (newer_off == (u64)-1) @@ -1097,12 +1129,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (!ret) { range->start = newer_off; i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; - newer_left = newer_cluster; } else { break; } } else { - i++; + if (ret > 0) { + i += ret; + last_len += ret << PAGE_CACHE_SHIFT; + } else { + i++; + last_len = 0; + } } } @@ -1128,16 +1165,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, mutex_unlock(&inode->i_mutex); } - disk_super = &root->fs_info->super_copy; + disk_super = root->fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (range->compress_type == BTRFS_COMPRESS_LZO) { features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; btrfs_set_super_incompat_flags(disk_super, features); } - if (!file) - kfree(ra); - return defrag_count; + ret = defrag_count; out_ra: if (!file) @@ -2579,7 +2614,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) return PTR_ERR(trans); } - dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, dir_id, "default", 7, 1); if (IS_ERR_OR_NULL(di)) { @@ -2595,7 +2630,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - disk_super = &root->fs_info->super_copy; + disk_super = root->fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; @@ -2862,7 +2897,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) int i; unsigned long rel_ptr; int size; - struct btrfs_ioctl_ino_path_args *ipa; + struct btrfs_ioctl_ino_path_args *ipa = NULL; struct inode_fs_paths *ipath = NULL; struct btrfs_path *path; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fb2605d998e..f38e452486b 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -158,8 +158,7 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 type; - u32 nr = btrfs_header_nritems(l); + u32 type, nr; struct btrfs_item *item; struct btrfs_root_item *ri; struct btrfs_dir_item *di; @@ -172,6 +171,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_key key; struct btrfs_key found_key; + if (!l) + return; + + nr = btrfs_header_nritems(l); + printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c new file mode 100644 index 00000000000..cd857119ba8 --- /dev/null +++ b/fs/btrfs/reada.c @@ -0,0 +1,949 @@ +/* + * Copyright (C) 2011 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/sched.h> +#include <linux/pagemap.h> +#include <linux/writeback.h> +#include <linux/blkdev.h> +#include <linux/rbtree.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include "ctree.h" +#include "volumes.h" +#include "disk-io.h" +#include "transaction.h" + +#undef DEBUG + +/* + * This is the implementation for the generic read ahead framework. + * + * To trigger a readahead, btrfs_reada_add must be called. It will start + * a read ahead for the given range [start, end) on tree root. The returned + * handle can either be used to wait on the readahead to finish + * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach). + * + * The read ahead works as follows: + * On btrfs_reada_add, the root of the tree is inserted into a radix_tree. + * reada_start_machine will then search for extents to prefetch and trigger + * some reads. When a read finishes for a node, all contained node/leaf + * pointers that lie in the given range will also be enqueued. The reads will + * be triggered in sequential order, thus giving a big win over a naive + * enumeration. It will also make use of multi-device layouts. Each disk + * will have its on read pointer and all disks will by utilized in parallel. + * Also will no two disks read both sides of a mirror simultaneously, as this + * would waste seeking capacity. Instead both disks will read different parts + * of the filesystem. + * Any number of readaheads can be started in parallel. The read order will be + * determined globally, i.e. 2 parallel readaheads will normally finish faster + * than the 2 started one after another. + */ + +#define MAX_MIRRORS 2 +#define MAX_IN_FLIGHT 6 + +struct reada_extctl { + struct list_head list; + struct reada_control *rc; + u64 generation; +}; + +struct reada_extent { + u64 logical; + struct btrfs_key top; + u32 blocksize; + int err; + struct list_head extctl; + struct kref refcnt; + spinlock_t lock; + struct reada_zone *zones[MAX_MIRRORS]; + int nzones; + struct btrfs_device *scheduled_for; +}; + +struct reada_zone { + u64 start; + u64 end; + u64 elems; + struct list_head list; + spinlock_t lock; + int locked; + struct btrfs_device *device; + struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ + int ndevs; + struct kref refcnt; +}; + +struct reada_machine_work { + struct btrfs_work work; + struct btrfs_fs_info *fs_info; +}; + +static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *); +static void reada_control_release(struct kref *kref); +static void reada_zone_release(struct kref *kref); +static void reada_start_machine(struct btrfs_fs_info *fs_info); +static void __reada_start_machine(struct btrfs_fs_info *fs_info); + +static int reada_add_block(struct reada_control *rc, u64 logical, + struct btrfs_key *top, int level, u64 generation); + +/* recurses */ +/* in case of err, eb might be NULL */ +static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, + u64 start, int err) +{ + int level = 0; + int nritems; + int i; + u64 bytenr; + u64 generation; + struct reada_extent *re; + struct btrfs_fs_info *fs_info = root->fs_info; + struct list_head list; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct btrfs_device *for_dev; + + if (eb) + level = btrfs_header_level(eb); + + /* find extent */ + spin_lock(&fs_info->reada_lock); + re = radix_tree_lookup(&fs_info->reada_tree, index); + if (re) + kref_get(&re->refcnt); + spin_unlock(&fs_info->reada_lock); + + if (!re) + return -1; + + spin_lock(&re->lock); + /* + * just take the full list from the extent. afterwards we + * don't need the lock anymore + */ + list_replace_init(&re->extctl, &list); + for_dev = re->scheduled_for; + re->scheduled_for = NULL; + spin_unlock(&re->lock); + + if (err == 0) { + nritems = level ? btrfs_header_nritems(eb) : 0; + generation = btrfs_header_generation(eb); + /* + * FIXME: currently we just set nritems to 0 if this is a leaf, + * effectively ignoring the content. In a next step we could + * trigger more readahead depending from the content, e.g. + * fetch the checksums for the extents in the leaf. + */ + } else { + /* + * this is the error case, the extent buffer has not been + * read correctly. We won't access anything from it and + * just cleanup our data structures. Effectively this will + * cut the branch below this node from read ahead. + */ + nritems = 0; + generation = 0; + } + + for (i = 0; i < nritems; i++) { + struct reada_extctl *rec; + u64 n_gen; + struct btrfs_key key; + struct btrfs_key next_key; + + btrfs_node_key_to_cpu(eb, &key, i); + if (i + 1 < nritems) + btrfs_node_key_to_cpu(eb, &next_key, i + 1); + else + next_key = re->top; + bytenr = btrfs_node_blockptr(eb, i); + n_gen = btrfs_node_ptr_generation(eb, i); + + list_for_each_entry(rec, &list, list) { + struct reada_control *rc = rec->rc; + + /* + * if the generation doesn't match, just ignore this + * extctl. This will probably cut off a branch from + * prefetch. Alternatively one could start a new (sub-) + * prefetch for this branch, starting again from root. + * FIXME: move the generation check out of this loop + */ +#ifdef DEBUG + if (rec->generation != generation) { + printk(KERN_DEBUG "generation mismatch for " + "(%llu,%d,%llu) %llu != %llu\n", + key.objectid, key.type, key.offset, + rec->generation, generation); + } +#endif + if (rec->generation == generation && + btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && + btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) + reada_add_block(rc, bytenr, &next_key, + level - 1, n_gen); + } + } + /* + * free extctl records + */ + while (!list_empty(&list)) { + struct reada_control *rc; + struct reada_extctl *rec; + + rec = list_first_entry(&list, struct reada_extctl, list); + list_del(&rec->list); + rc = rec->rc; + kfree(rec); + + kref_get(&rc->refcnt); + if (atomic_dec_and_test(&rc->elems)) { + kref_put(&rc->refcnt, reada_control_release); + wake_up(&rc->wait); + } + kref_put(&rc->refcnt, reada_control_release); + + reada_extent_put(fs_info, re); /* one ref for each entry */ + } + reada_extent_put(fs_info, re); /* our ref */ + if (for_dev) + atomic_dec(&for_dev->reada_in_flight); + + return 0; +} + +/* + * start is passed separately in case eb in NULL, which may be the case with + * failed I/O + */ +int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, + u64 start, int err) +{ + int ret; + + ret = __readahead_hook(root, eb, start, err); + + reada_start_machine(root->fs_info); + + return ret; +} + +static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, + struct btrfs_device *dev, u64 logical, + struct btrfs_bio *multi) +{ + int ret; + int looped = 0; + struct reada_zone *zone; + struct btrfs_block_group_cache *cache = NULL; + u64 start; + u64 end; + int i; + +again: + zone = NULL; + spin_lock(&fs_info->reada_lock); + ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, + logical >> PAGE_CACHE_SHIFT, 1); + if (ret == 1) + kref_get(&zone->refcnt); + spin_unlock(&fs_info->reada_lock); + + if (ret == 1) { + if (logical >= zone->start && logical < zone->end) + return zone; + spin_lock(&fs_info->reada_lock); + kref_put(&zone->refcnt, reada_zone_release); + spin_unlock(&fs_info->reada_lock); + } + + if (looped) + return NULL; + + cache = btrfs_lookup_block_group(fs_info, logical); + if (!cache) + return NULL; + + start = cache->key.objectid; + end = start + cache->key.offset - 1; + btrfs_put_block_group(cache); + + zone = kzalloc(sizeof(*zone), GFP_NOFS); + if (!zone) + return NULL; + + zone->start = start; + zone->end = end; + INIT_LIST_HEAD(&zone->list); + spin_lock_init(&zone->lock); + zone->locked = 0; + kref_init(&zone->refcnt); + zone->elems = 0; + zone->device = dev; /* our device always sits at index 0 */ + for (i = 0; i < multi->num_stripes; ++i) { + /* bounds have already been checked */ + zone->devs[i] = multi->stripes[i].dev; + } + zone->ndevs = multi->num_stripes; + + spin_lock(&fs_info->reada_lock); + ret = radix_tree_insert(&dev->reada_zones, + (unsigned long)zone->end >> PAGE_CACHE_SHIFT, + zone); + spin_unlock(&fs_info->reada_lock); + + if (ret) { + kfree(zone); + looped = 1; + goto again; + } + + return zone; +} + +static struct reada_extent *reada_find_extent(struct btrfs_root *root, + u64 logical, + struct btrfs_key *top, int level) +{ + int ret; + int looped = 0; + struct reada_extent *re = NULL; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct btrfs_bio *multi = NULL; + struct btrfs_device *dev; + u32 blocksize; + u64 length; + int nzones = 0; + int i; + unsigned long index = logical >> PAGE_CACHE_SHIFT; + +again: + spin_lock(&fs_info->reada_lock); + re = radix_tree_lookup(&fs_info->reada_tree, index); + if (re) + kref_get(&re->refcnt); + spin_unlock(&fs_info->reada_lock); + + if (re || looped) + return re; + + re = kzalloc(sizeof(*re), GFP_NOFS); + if (!re) + return NULL; + + blocksize = btrfs_level_size(root, level); + re->logical = logical; + re->blocksize = blocksize; + re->top = *top; + INIT_LIST_HEAD(&re->extctl); + spin_lock_init(&re->lock); + kref_init(&re->refcnt); + + /* + * map block + */ + length = blocksize; + ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &multi, 0); + if (ret || !multi || length < blocksize) + goto error; + + if (multi->num_stripes > MAX_MIRRORS) { + printk(KERN_ERR "btrfs readahead: more than %d copies not " + "supported", MAX_MIRRORS); + goto error; + } + + for (nzones = 0; nzones < multi->num_stripes; ++nzones) { + struct reada_zone *zone; + + dev = multi->stripes[nzones].dev; + zone = reada_find_zone(fs_info, dev, logical, multi); + if (!zone) + break; + + re->zones[nzones] = zone; + spin_lock(&zone->lock); + if (!zone->elems) + kref_get(&zone->refcnt); + ++zone->elems; + spin_unlock(&zone->lock); + spin_lock(&fs_info->reada_lock); + kref_put(&zone->refcnt, reada_zone_release); + spin_unlock(&fs_info->reada_lock); + } + re->nzones = nzones; + if (nzones == 0) { + /* not a single zone found, error and out */ + goto error; + } + + /* insert extent in reada_tree + all per-device trees, all or nothing */ + spin_lock(&fs_info->reada_lock); + ret = radix_tree_insert(&fs_info->reada_tree, index, re); + if (ret) { + spin_unlock(&fs_info->reada_lock); + if (ret != -ENOMEM) { + /* someone inserted the extent in the meantime */ + looped = 1; + } + goto error; + } + for (i = 0; i < nzones; ++i) { + dev = multi->stripes[i].dev; + ret = radix_tree_insert(&dev->reada_extents, index, re); + if (ret) { + while (--i >= 0) { + dev = multi->stripes[i].dev; + BUG_ON(dev == NULL); + radix_tree_delete(&dev->reada_extents, index); + } + BUG_ON(fs_info == NULL); + radix_tree_delete(&fs_info->reada_tree, index); + spin_unlock(&fs_info->reada_lock); + goto error; + } + } + spin_unlock(&fs_info->reada_lock); + + return re; + +error: + while (nzones) { + struct reada_zone *zone; + + --nzones; + zone = re->zones[nzones]; + kref_get(&zone->refcnt); + spin_lock(&zone->lock); + --zone->elems; + if (zone->elems == 0) { + /* + * no fs_info->reada_lock needed, as this can't be + * the last ref + */ + kref_put(&zone->refcnt, reada_zone_release); + } + spin_unlock(&zone->lock); + + spin_lock(&fs_info->reada_lock); + kref_put(&zone->refcnt, reada_zone_release); + spin_unlock(&fs_info->reada_lock); + } + kfree(re); + if (looped) + goto again; + return NULL; +} + +static void reada_kref_dummy(struct kref *kr) +{ +} + +static void reada_extent_put(struct btrfs_fs_info *fs_info, + struct reada_extent *re) +{ + int i; + unsigned long index = re->logical >> PAGE_CACHE_SHIFT; + + spin_lock(&fs_info->reada_lock); + if (!kref_put(&re->refcnt, reada_kref_dummy)) { + spin_unlock(&fs_info->reada_lock); + return; + } + + radix_tree_delete(&fs_info->reada_tree, index); + for (i = 0; i < re->nzones; ++i) { + struct reada_zone *zone = re->zones[i]; + + radix_tree_delete(&zone->device->reada_extents, index); + } + + spin_unlock(&fs_info->reada_lock); + + for (i = 0; i < re->nzones; ++i) { + struct reada_zone *zone = re->zones[i]; + + kref_get(&zone->refcnt); + spin_lock(&zone->lock); + --zone->elems; + if (zone->elems == 0) { + /* no fs_info->reada_lock needed, as this can't be + * the last ref */ + kref_put(&zone->refcnt, reada_zone_release); + } + spin_unlock(&zone->lock); + + spin_lock(&fs_info->reada_lock); + kref_put(&zone->refcnt, reada_zone_release); + spin_unlock(&fs_info->reada_lock); + } + if (re->scheduled_for) + atomic_dec(&re->scheduled_for->reada_in_flight); + + kfree(re); +} + +static void reada_zone_release(struct kref *kref) +{ + struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); + + radix_tree_delete(&zone->device->reada_zones, + zone->end >> PAGE_CACHE_SHIFT); + + kfree(zone); +} + +static void reada_control_release(struct kref *kref) +{ + struct reada_control *rc = container_of(kref, struct reada_control, + refcnt); + + kfree(rc); +} + +static int reada_add_block(struct reada_control *rc, u64 logical, + struct btrfs_key *top, int level, u64 generation) +{ + struct btrfs_root *root = rc->root; + struct reada_extent *re; + struct reada_extctl *rec; + + re = reada_find_extent(root, logical, top, level); /* takes one ref */ + if (!re) + return -1; + + rec = kzalloc(sizeof(*rec), GFP_NOFS); + if (!rec) { + reada_extent_put(root->fs_info, re); + return -1; + } + + rec->rc = rc; + rec->generation = generation; + atomic_inc(&rc->elems); + + spin_lock(&re->lock); + list_add_tail(&rec->list, &re->extctl); + spin_unlock(&re->lock); + + /* leave the ref on the extent */ + + return 0; +} + +/* + * called with fs_info->reada_lock held + */ +static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) +{ + int i; + unsigned long index = zone->end >> PAGE_CACHE_SHIFT; + + for (i = 0; i < zone->ndevs; ++i) { + struct reada_zone *peer; + peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index); + if (peer && peer->device != zone->device) + peer->locked = lock; + } +} + +/* + * called with fs_info->reada_lock held + */ +static int reada_pick_zone(struct btrfs_device *dev) +{ + struct reada_zone *top_zone = NULL; + struct reada_zone *top_locked_zone = NULL; + u64 top_elems = 0; + u64 top_locked_elems = 0; + unsigned long index = 0; + int ret; + + if (dev->reada_curr_zone) { + reada_peer_zones_set_lock(dev->reada_curr_zone, 0); + kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release); + dev->reada_curr_zone = NULL; + } + /* pick the zone with the most elements */ + while (1) { + struct reada_zone *zone; + + ret = radix_tree_gang_lookup(&dev->reada_zones, + (void **)&zone, index, 1); + if (ret == 0) + break; + index = (zone->end >> PAGE_CACHE_SHIFT) + 1; + if (zone->locked) { + if (zone->elems > top_locked_elems) { + top_locked_elems = zone->elems; + top_locked_zone = zone; + } + } else { + if (zone->elems > top_elems) { + top_elems = zone->elems; + top_zone = zone; + } + } + } + if (top_zone) + dev->reada_curr_zone = top_zone; + else if (top_locked_zone) + dev->reada_curr_zone = top_locked_zone; + else + return 0; + + dev->reada_next = dev->reada_curr_zone->start; + kref_get(&dev->reada_curr_zone->refcnt); + reada_peer_zones_set_lock(dev->reada_curr_zone, 1); + + return 1; +} + +static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, + struct btrfs_device *dev) +{ + struct reada_extent *re = NULL; + int mirror_num = 0; + struct extent_buffer *eb = NULL; + u64 logical; + u32 blocksize; + int ret; + int i; + int need_kick = 0; + + spin_lock(&fs_info->reada_lock); + if (dev->reada_curr_zone == NULL) { + ret = reada_pick_zone(dev); + if (!ret) { + spin_unlock(&fs_info->reada_lock); + return 0; + } + } + /* + * FIXME currently we issue the reads one extent at a time. If we have + * a contiguous block of extents, we could also coagulate them or use + * plugging to speed things up + */ + ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, + dev->reada_next >> PAGE_CACHE_SHIFT, 1); + if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { + ret = reada_pick_zone(dev); + if (!ret) { + spin_unlock(&fs_info->reada_lock); + return 0; + } + re = NULL; + ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, + dev->reada_next >> PAGE_CACHE_SHIFT, 1); + } + if (ret == 0) { + spin_unlock(&fs_info->reada_lock); + return 0; + } + dev->reada_next = re->logical + re->blocksize; + kref_get(&re->refcnt); + + spin_unlock(&fs_info->reada_lock); + + /* + * find mirror num + */ + for (i = 0; i < re->nzones; ++i) { + if (re->zones[i]->device == dev) { + mirror_num = i + 1; + break; + } + } + logical = re->logical; + blocksize = re->blocksize; + + spin_lock(&re->lock); + if (re->scheduled_for == NULL) { + re->scheduled_for = dev; + need_kick = 1; + } + spin_unlock(&re->lock); + + reada_extent_put(fs_info, re); + + if (!need_kick) + return 0; + + atomic_inc(&dev->reada_in_flight); + ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, + mirror_num, &eb); + if (ret) + __readahead_hook(fs_info->extent_root, NULL, logical, ret); + else if (eb) + __readahead_hook(fs_info->extent_root, eb, eb->start, ret); + + if (eb) + free_extent_buffer(eb); + + return 1; + +} + +static void reada_start_machine_worker(struct btrfs_work *work) +{ + struct reada_machine_work *rmw; + struct btrfs_fs_info *fs_info; + + rmw = container_of(work, struct reada_machine_work, work); + fs_info = rmw->fs_info; + + kfree(rmw); + + __reada_start_machine(fs_info); +} + +static void __reada_start_machine(struct btrfs_fs_info *fs_info) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + u64 enqueued; + u64 total = 0; + int i; + + do { + enqueued = 0; + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (atomic_read(&device->reada_in_flight) < + MAX_IN_FLIGHT) + enqueued += reada_start_machine_dev(fs_info, + device); + } + total += enqueued; + } while (enqueued && total < 10000); + + if (enqueued == 0) + return; + + /* + * If everything is already in the cache, this is effectively single + * threaded. To a) not hold the caller for too long and b) to utilize + * more cores, we broke the loop above after 10000 iterations and now + * enqueue to workers to finish it. This will distribute the load to + * the cores. + */ + for (i = 0; i < 2; ++i) + reada_start_machine(fs_info); +} + +static void reada_start_machine(struct btrfs_fs_info *fs_info) +{ + struct reada_machine_work *rmw; + + rmw = kzalloc(sizeof(*rmw), GFP_NOFS); + if (!rmw) { + /* FIXME we cannot handle this properly right now */ + BUG(); + } + rmw->work.func = reada_start_machine_worker; + rmw->fs_info = fs_info; + + btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); +} + +#ifdef DEBUG +static void dump_devs(struct btrfs_fs_info *fs_info, int all) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + unsigned long index; + int ret; + int i; + int j; + int cnt; + + spin_lock(&fs_info->reada_lock); + list_for_each_entry(device, &fs_devices->devices, dev_list) { + printk(KERN_DEBUG "dev %lld has %d in flight\n", device->devid, + atomic_read(&device->reada_in_flight)); + index = 0; + while (1) { + struct reada_zone *zone; + ret = radix_tree_gang_lookup(&device->reada_zones, + (void **)&zone, index, 1); + if (ret == 0) + break; + printk(KERN_DEBUG " zone %llu-%llu elems %llu locked " + "%d devs", zone->start, zone->end, zone->elems, + zone->locked); + for (j = 0; j < zone->ndevs; ++j) { + printk(KERN_CONT " %lld", + zone->devs[j]->devid); + } + if (device->reada_curr_zone == zone) + printk(KERN_CONT " curr off %llu", + device->reada_next - zone->start); + printk(KERN_CONT "\n"); + index = (zone->end >> PAGE_CACHE_SHIFT) + 1; + } + cnt = 0; + index = 0; + while (all) { + struct reada_extent *re = NULL; + + ret = radix_tree_gang_lookup(&device->reada_extents, + (void **)&re, index, 1); + if (ret == 0) + break; + printk(KERN_DEBUG + " re: logical %llu size %u empty %d for %lld", + re->logical, re->blocksize, + list_empty(&re->extctl), re->scheduled_for ? + re->scheduled_for->devid : -1); + + for (i = 0; i < re->nzones; ++i) { + printk(KERN_CONT " zone %llu-%llu devs", + re->zones[i]->start, + re->zones[i]->end); + for (j = 0; j < re->zones[i]->ndevs; ++j) { + printk(KERN_CONT " %lld", + re->zones[i]->devs[j]->devid); + } + } + printk(KERN_CONT "\n"); + index = (re->logical >> PAGE_CACHE_SHIFT) + 1; + if (++cnt > 15) + break; + } + } + + index = 0; + cnt = 0; + while (all) { + struct reada_extent *re = NULL; + + ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re, + index, 1); + if (ret == 0) + break; + if (!re->scheduled_for) { + index = (re->logical >> PAGE_CACHE_SHIFT) + 1; + continue; + } + printk(KERN_DEBUG + "re: logical %llu size %u list empty %d for %lld", + re->logical, re->blocksize, list_empty(&re->extctl), + re->scheduled_for ? re->scheduled_for->devid : -1); + for (i = 0; i < re->nzones; ++i) { + printk(KERN_CONT " zone %llu-%llu devs", + re->zones[i]->start, + re->zones[i]->end); + for (i = 0; i < re->nzones; ++i) { + printk(KERN_CONT " zone %llu-%llu devs", + re->zones[i]->start, + re->zones[i]->end); + for (j = 0; j < re->zones[i]->ndevs; ++j) { + printk(KERN_CONT " %lld", + re->zones[i]->devs[j]->devid); + } + } + } + printk(KERN_CONT "\n"); + index = (re->logical >> PAGE_CACHE_SHIFT) + 1; + } + spin_unlock(&fs_info->reada_lock); +} +#endif + +/* + * interface + */ +struct reada_control *btrfs_reada_add(struct btrfs_root *root, + struct btrfs_key *key_start, struct btrfs_key *key_end) +{ + struct reada_control *rc; + u64 start; + u64 generation; + int level; + struct extent_buffer *node; + static struct btrfs_key max_key = { + .objectid = (u64)-1, + .type = (u8)-1, + .offset = (u64)-1 + }; + + rc = kzalloc(sizeof(*rc), GFP_NOFS); + if (!rc) + return ERR_PTR(-ENOMEM); + + rc->root = root; + rc->key_start = *key_start; + rc->key_end = *key_end; + atomic_set(&rc->elems, 0); + init_waitqueue_head(&rc->wait); + kref_init(&rc->refcnt); + kref_get(&rc->refcnt); /* one ref for having elements */ + + node = btrfs_root_node(root); + start = node->start; + level = btrfs_header_level(node); + generation = btrfs_header_generation(node); + free_extent_buffer(node); + + reada_add_block(rc, start, &max_key, level, generation); + + reada_start_machine(root->fs_info); + + return rc; +} + +#ifdef DEBUG +int btrfs_reada_wait(void *handle) +{ + struct reada_control *rc = handle; + + while (atomic_read(&rc->elems)) { + wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, + 5 * HZ); + dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); + } + + dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); + + kref_put(&rc->refcnt, reada_control_release); + + return 0; +} +#else +int btrfs_reada_wait(void *handle) +{ + struct reada_control *rc = handle; + + while (atomic_read(&rc->elems)) { + wait_event(rc->wait, atomic_read(&rc->elems) == 0); + } + + kref_put(&rc->refcnt, reada_control_release); + + return 0; +} +#endif + +void btrfs_reada_detach(void *handle) +{ + struct reada_control *rc = handle; + + kref_put(&rc->refcnt, reada_control_release); +} diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 59bb1764273..24d654ce7a0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2041,8 +2041,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, BUG_ON(IS_ERR(trans)); trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, - min_reserved, 0); + ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); if (ret) { BUG_ON(ret != -EAGAIN); ret = btrfs_commit_transaction(trans, root); @@ -2152,8 +2151,7 @@ int prepare_to_merge(struct reloc_control *rc, int err) again: if (!err) { num_bytes = rc->merging_rsv_size; - ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, - num_bytes); + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); if (ret) err = ret; } @@ -2427,7 +2425,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, num_bytes = calcu_metadata_size(rc, node, 1) * 2; trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); if (ret) { if (ret == -EAGAIN) rc->commit_transaction = 1; @@ -2922,6 +2920,7 @@ static int relocate_file_extent_cluster(struct inode *inode, unsigned long last_index; struct page *page; struct file_ra_state *ra; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int nr = 0; int ret = 0; @@ -2956,7 +2955,7 @@ static int relocate_file_extent_cluster(struct inode *inode, ra, NULL, index, last_index + 1 - index); page = find_or_create_page(inode->i_mapping, index, - GFP_NOFS); + mask); if (!page) { btrfs_delalloc_release_metadata(inode, PAGE_CACHE_SIZE); @@ -3323,8 +3322,11 @@ static int find_data_references(struct reloc_control *rc, } key.objectid = ref_objectid; - key.offset = ref_offset; key.type = BTRFS_EXTENT_DATA_KEY; + if (ref_offset > ((u64)-1 << 32)) + key.offset = 0; + else + key.offset = ref_offset; path->search_commit_root = 1; path->skip_locking = 1; @@ -3645,14 +3647,11 @@ int prepare_to_relocate(struct reloc_control *rc) * btrfs_init_reloc_root will use them when there * is no reservation in transaction handle. */ - ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, + ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, rc->extent_root->nodesize * 256); if (ret) return ret; - rc->block_rsv->refill_used = 1; - btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); - memset(&rc->cluster, 0, sizeof(rc->cluster)); rc->search_start = rc->block_group->key.objectid; rc->extents_found = 0; @@ -3777,8 +3776,7 @@ restart: } } - ret = btrfs_block_rsv_check(trans, rc->extent_root, - rc->block_rsv, 0, 5); + ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); if (ret < 0) { if (ret != -EAGAIN) { err = ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index eba42e5fd5f..94cd3a19e9c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -33,15 +33,12 @@ * any can be found. * * Future enhancements: - * - To enhance the performance, better read-ahead strategies for the - * extent-tree can be employed. * - In case an unrepairable extent is encountered, track which files are * affected and report them * - In case of a read error on files with nodatasum, map the file and read * the extent to trigger a writeback of the good copy * - track and record media errors, throw out bad devices * - add a mode to also read unallocated space - * - make the prefetch cancellable */ struct scrub_bio; @@ -209,7 +206,7 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) atomic_set(&sdev->in_flight, 0); atomic_set(&sdev->fixup_cnt, 0); atomic_set(&sdev->cancel_req, 0); - sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); + sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); INIT_LIST_HEAD(&sdev->csum_list); spin_lock_init(&sdev->list_lock); @@ -1130,13 +1127,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, int slot; int i; u64 nstripes; - int start_stripe; struct extent_buffer *l; struct btrfs_key key; u64 physical; u64 logical; u64 generation; int mirror_num; + struct reada_control *reada1; + struct reada_control *reada2; + struct btrfs_key key_start; + struct btrfs_key key_end; u64 increment = map->stripe_len; u64 offset; @@ -1168,81 +1168,67 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, if (!path) return -ENOMEM; - path->reada = 2; path->search_commit_root = 1; path->skip_locking = 1; /* - * find all extents for each stripe and just read them to get - * them into the page cache - * FIXME: we can do better. build a more intelligent prefetching + * trigger the readahead for extent tree csum tree and wait for + * completion. During readahead, the scrub is officially paused + * to not hold off transaction commits */ logical = base + offset; - physical = map->stripes[num].physical; - ret = 0; - for (i = 0; i < nstripes; ++i) { - key.objectid = logical; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)0; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out_noplug; - - /* - * we might miss half an extent here, but that doesn't matter, - * as it's only the prefetch - */ - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto out_noplug; - break; - } - btrfs_item_key_to_cpu(l, &key, slot); + wait_event(sdev->list_wait, + atomic_read(&sdev->in_flight) == 0); + atomic_inc(&fs_info->scrubs_paused); + wake_up(&fs_info->scrub_pause_wait); - if (key.objectid >= logical + map->stripe_len) - break; + /* FIXME it might be better to start readahead at commit root */ + key_start.objectid = logical; + key_start.type = BTRFS_EXTENT_ITEM_KEY; + key_start.offset = (u64)0; + key_end.objectid = base + offset + nstripes * increment; + key_end.type = BTRFS_EXTENT_ITEM_KEY; + key_end.offset = (u64)0; + reada1 = btrfs_reada_add(root, &key_start, &key_end); + + key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key_start.type = BTRFS_EXTENT_CSUM_KEY; + key_start.offset = logical; + key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key_end.type = BTRFS_EXTENT_CSUM_KEY; + key_end.offset = base + offset + nstripes * increment; + reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); + + if (!IS_ERR(reada1)) + btrfs_reada_wait(reada1); + if (!IS_ERR(reada2)) + btrfs_reada_wait(reada2); - path->slots[0]++; - } - btrfs_release_path(path); - logical += increment; - physical += map->stripe_len; - cond_resched(); + mutex_lock(&fs_info->scrub_lock); + while (atomic_read(&fs_info->scrub_pause_req)) { + mutex_unlock(&fs_info->scrub_lock); + wait_event(fs_info->scrub_pause_wait, + atomic_read(&fs_info->scrub_pause_req) == 0); + mutex_lock(&fs_info->scrub_lock); } + atomic_dec(&fs_info->scrubs_paused); + mutex_unlock(&fs_info->scrub_lock); + wake_up(&fs_info->scrub_pause_wait); /* * collect all data csums for the stripe to avoid seeking during * the scrub. This might currently (crc32) end up to be about 1MB */ - start_stripe = 0; blk_start_plug(&plug); -again: - logical = base + offset + start_stripe * increment; - for (i = start_stripe; i < nstripes; ++i) { - ret = btrfs_lookup_csums_range(csum_root, logical, - logical + map->stripe_len - 1, - &sdev->csum_list, 1); - if (ret) - goto out; - logical += increment; - cond_resched(); - } /* * now find all extents for each stripe and scrub them */ - logical = base + offset + start_stripe * increment; - physical = map->stripes[num].physical + start_stripe * map->stripe_len; + logical = base + offset; + physical = map->stripes[num].physical; ret = 0; - for (i = start_stripe; i < nstripes; ++i) { + for (i = 0; i < nstripes; ++i) { /* * canceled? */ @@ -1271,11 +1257,14 @@ again: atomic_dec(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); wake_up(&fs_info->scrub_pause_wait); - scrub_free_csums(sdev); - start_stripe = i; - goto again; } + ret = btrfs_lookup_csums_range(csum_root, logical, + logical + map->stripe_len - 1, + &sdev->csum_list, 1); + if (ret) + goto out; + key.objectid = logical; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)0; @@ -1371,7 +1360,6 @@ next: out: blk_finish_plug(&plug); -out_noplug: btrfs_free_path(path); return ret < 0 ? ret : 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 15634d4648d..57080dffdfc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -40,6 +40,7 @@ #include <linux/magic.h> #include <linux/slab.h> #include <linux/cleancache.h> +#include <linux/mnt_namespace.h> #include "compat.h" #include "delayed-inode.h" #include "ctree.h" @@ -58,6 +59,7 @@ #include <trace/events/btrfs.h> static const struct super_operations btrfs_super_ops; +static struct file_system_type btrfs_fs_type; static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, char nbuf[16]) @@ -162,7 +164,7 @@ enum { Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, - Opt_inode_cache, Opt_err, + Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, }; static match_table_t tokens = { @@ -195,6 +197,8 @@ static match_table_t tokens = { {Opt_subvolrootid, "subvolrootid=%d"}, {Opt_defrag, "autodefrag"}, {Opt_inode_cache, "inode_cache"}, + {Opt_no_space_cache, "no_space_cache"}, + {Opt_recovery, "recovery"}, {Opt_err, NULL}, }; @@ -206,14 +210,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) { struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; - char *p, *num, *orig; + char *p, *num, *orig = NULL; + u64 cache_gen; int intarg; int ret = 0; char *compress_type; bool compress_force = false; + cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); + if (cache_gen) + btrfs_set_opt(info->mount_opt, SPACE_CACHE); + if (!options) - return 0; + goto out; /* * strsep changes the string, duplicate it because parse_options @@ -360,9 +369,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, DISCARD); break; case Opt_space_cache: - printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_no_space_cache: + printk(KERN_INFO "btrfs: disabling disk space caching\n"); + btrfs_clear_opt(info->mount_opt, SPACE_CACHE); + break; case Opt_inode_cache: printk(KERN_INFO "btrfs: enabling inode map caching\n"); btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); @@ -381,6 +393,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: enabling auto defrag"); btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); break; + case Opt_recovery: + printk(KERN_INFO "btrfs: enabling auto recovery"); + btrfs_set_opt(info->mount_opt, RECOVERY); + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); @@ -391,6 +407,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) } } out: + if (!ret && btrfs_test_opt(root, SPACE_CACHE)) + printk(KERN_INFO "btrfs: disk space caching is enabled\n"); kfree(orig); return ret; } @@ -406,12 +424,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; - char *opts, *orig, *p; + char *device_name, *opts, *orig, *p; int error = 0; int intarg; if (!options) - goto out; + return 0; /* * strsep changes the string, duplicate it because parse_options @@ -457,29 +475,24 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, } break; case Opt_device: - error = btrfs_scan_one_device(match_strdup(&args[0]), + device_name = match_strdup(&args[0]); + if (!device_name) { + error = -ENOMEM; + goto out; + } + error = btrfs_scan_one_device(device_name, flags, holder, fs_devices); + kfree(device_name); if (error) - goto out_free_opts; + goto out; break; default: break; } } - out_free_opts: +out: kfree(orig); - out: - /* - * If no subvolume name is specified we use the default one. Allocate - * a copy of the string "." here so that code later in the - * mount path doesn't care if it's the default volume or another one. - */ - if (!*subvol_name) { - *subvol_name = kstrdup(".", GFP_KERNEL); - if (!*subvol_name) - return -ENOMEM; - } return error; } @@ -492,7 +505,6 @@ static struct dentry *get_default_root(struct super_block *sb, struct btrfs_path *path; struct btrfs_key location; struct inode *inode; - struct dentry *dentry; u64 dir_id; int new = 0; @@ -517,7 +529,7 @@ static struct dentry *get_default_root(struct super_block *sb, * will mount by default if we haven't been given a specific subvolume * to mount. */ - dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); if (IS_ERR(di)) { btrfs_free_path(path); @@ -566,29 +578,7 @@ setup_root: return dget(sb->s_root); } - if (new) { - const struct qstr name = { .name = "/", .len = 1 }; - - /* - * New inode, we need to make the dentry a sibling of s_root so - * everything gets cleaned up properly on unmount. - */ - dentry = d_alloc(sb->s_root, &name); - if (!dentry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - d_splice_alias(inode, dentry); - } else { - /* - * We found the inode in cache, just find a dentry for it and - * put the reference to the inode we just got. - */ - dentry = d_find_alias(inode); - iput(inode); - } - - return dentry; + return d_obtain_alias(inode); } static int btrfs_fill_super(struct super_block *sb, @@ -719,6 +709,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noacl"); if (btrfs_test_opt(root, SPACE_CACHE)) seq_puts(seq, ",space_cache"); + else + seq_puts(seq, ",no_space_cache"); if (btrfs_test_opt(root, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) @@ -753,6 +745,137 @@ static int btrfs_set_super(struct super_block *s, void *data) return set_anon_super(s, data); } +/* + * subvolumes are identified by ino 256 + */ +static inline int is_subvolume_inode(struct inode *inode) +{ + if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + return 1; + return 0; +} + +/* + * This will strip out the subvol=%s argument for an argument string and add + * subvolid=0 to make sure we get the actual tree root for path walking to the + * subvol we want. + */ +static char *setup_root_args(char *args) +{ + unsigned copied = 0; + unsigned len = strlen(args) + 2; + char *pos; + char *ret; + + /* + * We need the same args as before, but minus + * + * subvol=a + * + * and add + * + * subvolid=0 + * + * which is a difference of 2 characters, so we allocate strlen(args) + + * 2 characters. + */ + ret = kzalloc(len * sizeof(char), GFP_NOFS); + if (!ret) + return NULL; + pos = strstr(args, "subvol="); + + /* This shouldn't happen, but just in case.. */ + if (!pos) { + kfree(ret); + return NULL; + } + + /* + * The subvol=<> arg is not at the front of the string, copy everybody + * up to that into ret. + */ + if (pos != args) { + *pos = '\0'; + strcpy(ret, args); + copied += strlen(args); + pos++; + } + + strncpy(ret + copied, "subvolid=0", len - copied); + + /* Length of subvolid=0 */ + copied += 10; + + /* + * If there is no , after the subvol= option then we know there's no + * other options and we can just return. + */ + pos = strchr(pos, ','); + if (!pos) + return ret; + + /* Copy the rest of the arguments into our buffer */ + strncpy(ret + copied, pos, len - copied); + copied += strlen(pos); + + return ret; +} + +static struct dentry *mount_subvol(const char *subvol_name, int flags, + const char *device_name, char *data) +{ + struct super_block *s; + struct dentry *root; + struct vfsmount *mnt; + struct mnt_namespace *ns_private; + char *newargs; + struct path path; + int error; + + newargs = setup_root_args(data); + if (!newargs) + return ERR_PTR(-ENOMEM); + mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, + newargs); + kfree(newargs); + if (IS_ERR(mnt)) + return ERR_CAST(mnt); + + ns_private = create_mnt_ns(mnt); + if (IS_ERR(ns_private)) { + mntput(mnt); + return ERR_CAST(ns_private); + } + + /* + * This will trigger the automount of the subvol so we can just + * drop the mnt we have here and return the dentry that we + * found. + */ + error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name, + LOOKUP_FOLLOW, &path); + put_mnt_ns(ns_private); + if (error) + return ERR_PTR(error); + + if (!is_subvolume_inode(path.dentry->d_inode)) { + path_put(&path); + mntput(mnt); + error = -EINVAL; + printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", + subvol_name); + return ERR_PTR(-EINVAL); + } + + /* Get a ref to the sb and the dentry we found and return it */ + s = path.mnt->mnt_sb; + atomic_inc(&s->s_active); + root = dget(path.dentry); + path_put(&path); + down_write(&s->s_umount); + + return root; +} /* * Find a superblock for the given device / mount point. @@ -784,13 +907,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (error) return ERR_PTR(error); + if (subvol_name) { + root = mount_subvol(subvol_name, flags, device_name, data); + kfree(subvol_name); + return root; + } + error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); if (error) - goto error_free_subvol_name; + return ERR_PTR(error); error = btrfs_open_devices(fs_devices, mode, fs_type); if (error) - goto error_free_subvol_name; + return ERR_PTR(error); if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { error = -EACCES; @@ -813,88 +942,57 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, fs_info->fs_devices = fs_devices; tree_root->fs_info = fs_info; + fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); + fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); + if (!fs_info->super_copy || !fs_info->super_for_commit) { + error = -ENOMEM; + goto error_close_devices; + } + bdev = fs_devices->latest_bdev; s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); - if (IS_ERR(s)) - goto error_s; + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto error_close_devices; + } if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { deactivate_locked_super(s); - error = -EBUSY; - goto error_close_devices; + return ERR_PTR(-EBUSY); } btrfs_close_devices(fs_devices); - kfree(fs_info); + free_fs_info(fs_info); kfree(tree_root); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + btrfs_sb(s)->fs_info->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); - goto error_free_subvol_name; + return ERR_PTR(error); } - btrfs_sb(s)->fs_info->bdev_holder = fs_type; s->s_flags |= MS_ACTIVE; } - /* if they gave us a subvolume name bind mount into that */ - if (strcmp(subvol_name, ".")) { - struct dentry *new_root; - - root = get_default_root(s, subvol_rootid); - if (IS_ERR(root)) { - error = PTR_ERR(root); - deactivate_locked_super(s); - goto error_free_subvol_name; - } - - mutex_lock(&root->d_inode->i_mutex); - new_root = lookup_one_len(subvol_name, root, - strlen(subvol_name)); - mutex_unlock(&root->d_inode->i_mutex); - - if (IS_ERR(new_root)) { - dput(root); - deactivate_locked_super(s); - error = PTR_ERR(new_root); - goto error_free_subvol_name; - } - if (!new_root->d_inode) { - dput(root); - dput(new_root); - deactivate_locked_super(s); - error = -ENXIO; - goto error_free_subvol_name; - } - dput(root); - root = new_root; - } else { - root = get_default_root(s, subvol_objectid); - if (IS_ERR(root)) { - error = PTR_ERR(root); - deactivate_locked_super(s); - goto error_free_subvol_name; - } + root = get_default_root(s, subvol_objectid); + if (IS_ERR(root)) { + deactivate_locked_super(s); + return root; } - kfree(subvol_name); return root; -error_s: - error = PTR_ERR(s); error_close_devices: btrfs_close_devices(fs_devices); - kfree(fs_info); + free_fs_info(fs_info); kfree(tree_root); -error_free_subvol_name: - kfree(subvol_name); return ERR_PTR(error); } @@ -919,7 +1017,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (root->fs_info->fs_devices->rw_devices == 0) return -EACCES; - if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) + if (btrfs_super_log_root(root->fs_info->super_copy) != 0) return -EINVAL; ret = btrfs_cleanup_fs_roots(root->fs_info); @@ -1085,7 +1183,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_super_block *disk_super = root->fs_info->super_copy; struct list_head *head = &root->fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e24b7964a15..29f782cc2cc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -275,7 +275,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, */ if (num_items > 0 && root != root->fs_info->chunk_root) { num_bytes = btrfs_calc_trans_metadata_size(root, num_items); - ret = btrfs_block_rsv_add(NULL, root, + ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, num_bytes); if (ret) @@ -418,8 +418,8 @@ static int should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - ret = btrfs_block_rsv_check(trans, root, - &root->fs_info->global_block_rsv, 0, 5); + + ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); return ret ? 1 : 0; } @@ -427,17 +427,26 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_block_rsv *rsv = trans->block_rsv; int updates; smp_mb(); if (cur_trans->blocked || cur_trans->delayed_refs.flushing) return 1; + /* + * We need to do this in case we're deleting csums so the global block + * rsv get's used instead of the csum block rsv. + */ + trans->block_rsv = NULL; + updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (updates) btrfs_run_delayed_refs(trans, root, updates); + trans->block_rsv = rsv; + return should_end_transaction(trans, root); } @@ -453,6 +462,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; while (count < 4) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; @@ -473,8 +484,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, count++; } - btrfs_trans_release_metadata(trans, root); - if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root)) { trans->transaction->blocked = 1; @@ -562,50 +571,21 @@ int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, int btrfs_write_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { - int ret; int err = 0; int werr = 0; - struct page *page; - struct inode *btree_inode = root->fs_info->btree_inode; + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; u64 start = 0; u64 end; - unsigned long index; - while (1) { - ret = find_first_extent_bit(dirty_pages, start, &start, &end, - mark); - if (ret) - break; - while (start <= end) { - cond_resched(); - - index = start >> PAGE_CACHE_SHIFT; - start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_get_page(btree_inode->i_mapping, index); - if (!page) - continue; - - btree_lock_page_hook(page); - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - continue; - } - - if (PageWriteback(page)) { - if (PageDirty(page)) - wait_on_page_writeback(page); - else { - unlock_page(page); - page_cache_release(page); - continue; - } - } - err = write_one_page(page, 0); - if (err) - werr = err; - page_cache_release(page); - } + while (!find_first_extent_bit(dirty_pages, start, &start, &end, + mark)) { + convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, + GFP_NOFS); + err = filemap_fdatawrite_range(mapping, start, end); + if (err) + werr = err; + cond_resched(); + start = end + 1; } if (err) werr = err; @@ -621,39 +601,20 @@ int btrfs_write_marked_extents(struct btrfs_root *root, int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { - int ret; int err = 0; int werr = 0; - struct page *page; - struct inode *btree_inode = root->fs_info->btree_inode; + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; u64 start = 0; u64 end; - unsigned long index; - while (1) { - ret = find_first_extent_bit(dirty_pages, start, &start, &end, - mark); - if (ret) - break; - - clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); - while (start <= end) { - index = start >> PAGE_CACHE_SHIFT; - start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_get_page(btree_inode->i_mapping, index); - if (!page) - continue; - if (PageDirty(page)) { - btree_lock_page_hook(page); - wait_on_page_writeback(page); - err = write_one_page(page, 0); - if (err) - werr = err; - } - wait_on_page_writeback(page); - page_cache_release(page); - cond_resched(); - } + while (!find_first_extent_bit(dirty_pages, start, &start, &end, + EXTENT_NEED_WAIT)) { + clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); + err = filemap_fdatawait_range(mapping, start, end); + if (err) + werr = err; + cond_resched(); + start = end + 1; } if (err) werr = err; @@ -673,7 +634,12 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, ret = btrfs_write_marked_extents(root, dirty_pages, mark); ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); - return ret || ret2; + + if (ret) + return ret; + if (ret2) + return ret2; + return 0; } int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, @@ -911,10 +877,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); - btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); if (to_reserve > 0) { - ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, + ret = btrfs_block_rsv_add(root, &pending->block_rsv, to_reserve); if (ret) { pending->error = ret; @@ -1002,7 +967,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, BUG_ON(IS_ERR(pending->snap)); btrfs_reloc_post_snapshot(trans, pending); - btrfs_orphan_post_snapshot(trans, pending); fail: kfree(new_root_item); trans->block_rsv = rsv; @@ -1032,7 +996,7 @@ static void update_super_roots(struct btrfs_root *root) struct btrfs_root_item *root_item; struct btrfs_super_block *super; - super = &root->fs_info->super_copy; + super = root->fs_info->super_copy; root_item = &root->fs_info->chunk_root->root_item; super->chunk_root = root_item->bytenr; @@ -1043,7 +1007,7 @@ static void update_super_roots(struct btrfs_root *root) super->root = root_item->bytenr; super->generation = root_item->generation; super->root_level = root_item->level; - if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) + if (btrfs_test_opt(root, SPACE_CACHE)) super->cache_generation = root_item->generation; } @@ -1168,14 +1132,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_run_ordered_operations(root, 0); + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); - btrfs_trans_release_metadata(trans, root); - cur_trans = trans->transaction; /* * set the flushing flag so procs in this transaction have to @@ -1341,12 +1306,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, update_super_roots(root); if (!root->fs_info->log_root_recovering) { - btrfs_set_super_log_root(&root->fs_info->super_copy, 0); - btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); + btrfs_set_super_log_root(root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); } - memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, + sizeof(*root->fs_info->super_copy)); trans->transaction->blocked = 0; spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 786639fca06..f4d81c06d48 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_pin_extent(log->fs_info->extent_root, - eb->start, eb->len, 0); + btrfs_pin_extent_for_log_replay(wc->trans, + log->fs_info->extent_root, + eb->start, eb->len); if (btrfs_buffer_uptodate(eb, gen)) { if (wc->write) @@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(root, + ret = btrfs_free_and_pin_reserved_extent(root, bytenr, blocksize); BUG_ON(ret); } @@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_unlock(next); WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(root, + ret = btrfs_free_and_pin_reserved_extent(root, path->nodes[*level]->start, path->nodes[*level]->len); BUG_ON(ret); @@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(log, next->start, + ret = btrfs_free_and_pin_reserved_extent(log, next->start, next->len); BUG_ON(ret); } @@ -2013,10 +2014,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* wait for previous tree log sync to complete */ if (atomic_read(&root->log_commit[(index1 + 1) % 2])) wait_log_commit(trans, root, root->log_transid - 1); - while (1) { unsigned long batch = root->log_batch; - if (root->log_multiple_pids) { + /* when we're on an ssd, just kick the log commit out */ + if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&root->log_mutex); @@ -2117,9 +2118,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); - btrfs_set_super_log_root(&root->fs_info->super_for_commit, + btrfs_set_super_log_root(root->fs_info->super_for_commit, log_root_tree->node->start); - btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, + btrfs_set_super_log_root_level(root->fs_info->super_for_commit, btrfs_header_level(log_root_tree->node)); log_root_tree->log_batch = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 18baac5a3f6..f8e2943101a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -366,6 +366,14 @@ static noinline int device_list_add(const char *path, } INIT_LIST_HEAD(&device->dev_alloc_list); + /* init readahead state */ + spin_lock_init(&device->reada_lock); + device->reada_curr_zone = NULL; + atomic_set(&device->reada_in_flight, 0); + device->reada_next = 0; + INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); + mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); mutex_unlock(&fs_devices->device_list_mutex); @@ -597,10 +605,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); - if (!bh) { - ret = -EINVAL; + if (!bh) goto error_close; - } disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); @@ -655,7 +661,7 @@ error: continue; } if (fs_devices->open_devices == 0) { - ret = -EIO; + ret = -EINVAL; goto out; } fs_devices->seeding = seeding; @@ -1013,8 +1019,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, } BUG_ON(ret); - if (device->bytes_used > 0) - device->bytes_used -= btrfs_dev_extent_length(leaf, extent); + if (device->bytes_used > 0) { + u64 len = btrfs_dev_extent_length(leaf, extent); + device->bytes_used -= len; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += len; + spin_unlock(&root->fs_info->free_chunk_lock); + } ret = btrfs_del_item(trans, root, path); out: @@ -1356,6 +1367,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (ret) goto error_undo; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space = device->total_bytes - + device->bytes_used; + spin_unlock(&root->fs_info->free_chunk_lock); + device->in_fs_metadata = 0; btrfs_scrub_cancel_dev(root, device); @@ -1387,8 +1403,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) call_rcu(&device->rcu, free_device); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; - btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); + num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; + btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; @@ -1450,7 +1466,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_fs_devices *old_devices; struct btrfs_fs_devices *seed_devices; - struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_super_block *disk_super = root->fs_info->super_copy; struct btrfs_device *device; u64 super_flags; @@ -1691,15 +1707,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += device->total_bytes; + spin_unlock(&root->fs_info->free_chunk_lock); + if (!blk_queue_nonrot(bdev_get_queue(bdev))) root->fs_info->fs_devices->rotating = 1; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + btrfs_set_super_total_bytes(root->fs_info->super_copy, total_bytes + device->total_bytes); - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); - btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); + btrfs_set_super_num_devices(root->fs_info->super_copy, total_bytes + 1); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); @@ -1790,7 +1810,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size) { struct btrfs_super_block *super_copy = - &device->dev_root->fs_info->super_copy; + device->dev_root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = new_size - device->total_bytes; @@ -1849,7 +1869,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 chunk_offset) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; u8 *ptr; @@ -2175,7 +2195,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) bool retried = false; struct extent_buffer *l; struct btrfs_key key; - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); u64 old_size = device->total_bytes; u64 diff = device->total_bytes - new_size; @@ -2192,8 +2212,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) lock_chunks(root); device->total_bytes = new_size; - if (device->writeable) + if (device->writeable) { device->fs_devices->total_rw_bytes -= diff; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space -= diff; + spin_unlock(&root->fs_info->free_chunk_lock); + } unlock_chunks(root); again: @@ -2257,6 +2281,9 @@ again: device->total_bytes = old_size; if (device->writeable) device->fs_devices->total_rw_bytes += diff; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += diff; + spin_unlock(&root->fs_info->free_chunk_lock); unlock_chunks(root); goto done; } @@ -2292,7 +2319,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct btrfs_disk_key disk_key; u32 array_size; u8 *ptr; @@ -2615,6 +2642,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, index++; } + spin_lock(&extent_root->fs_info->free_chunk_lock); + extent_root->fs_info->free_chunk_space -= (stripe_size * + map->num_stripes); + spin_unlock(&extent_root->fs_info->free_chunk_lock); + index = 0; stripe = &chunk->stripe; while (index < map->num_stripes) { @@ -3626,15 +3658,20 @@ static int read_one_dev(struct btrfs_root *root, fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->in_fs_metadata = 1; - if (device->writeable) + if (device->writeable) { device->fs_devices->total_rw_bytes += device->total_bytes; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += device->total_bytes - + device->bytes_used; + spin_unlock(&root->fs_info->free_chunk_lock); + } ret = 0; return ret; } int btrfs_read_sys_array(struct btrfs_root *root) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 71f4f3f6749..ab5b1c49f35 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -92,6 +92,14 @@ struct btrfs_device { struct btrfs_work work; struct rcu_head rcu; struct work_struct rcu_work; + + /* readahead state */ + spinlock_t reada_lock; + atomic_t reada_in_flight; + u64 reada_next; + struct reada_zone *reada_curr_zone; + struct radix_tree_root reada_zones; + struct radix_tree_root reada_extents; }; struct btrfs_fs_devices { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 69565e5fc6a..a76e41c04b7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -127,6 +127,17 @@ static int do_setxattr(struct btrfs_trans_handle *trans, again: ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), name, name_len, value, size); + /* + * If we're setting an xattr to a new value but the new value is say + * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting + * back from split_leaf. This is because it thinks we'll be extending + * the existing item size, but we're asking for enough space to add the + * item itself. So if we get EOVERFLOW just set ret to EEXIST and let + * the rest of the function figure it out. + */ + if (ret == -EOVERFLOW) + ret = -EEXIST; + if (ret == -EEXIST) { if (flags & XATTR_CREATE) goto out; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index e76bfeb6826..30acd22147e 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -351,9 +351,7 @@ static int build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) { unsigned int dlen; - unsigned int wlen; - unsigned int size = 6 * sizeof(struct ntlmssp2_name); - __le64 curtime; + unsigned int size = 2 * sizeof(struct ntlmssp2_name); char *defdmname = "WORKGROUP"; unsigned char *blobptr; struct ntlmssp2_name *attrptr; @@ -365,15 +363,14 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) } dlen = strlen(ses->domainName); - wlen = strlen(ses->server->hostname); - /* The length of this blob is a size which is - * six times the size of a structure which holds name/size + - * two times the unicode length of a domain name + - * two times the unicode length of a server name + - * size of a timestamp (which is 8 bytes). + /* + * The length of this blob is two times the size of a + * structure (av pair) which holds name/size + * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) + + * unicode length of a netbios domain name */ - ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; + ses->auth_key.len = size + 2 * dlen; ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { ses->auth_key.len = 0; @@ -384,44 +381,15 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) blobptr = ses->auth_key.response; attrptr = (struct ntlmssp2_name *) blobptr; + /* + * As defined in MS-NTLM 3.3.2, just this av pair field + * is sufficient as part of the temp + */ attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); attrptr->length = cpu_to_le16(2 * dlen); blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); - blobptr += 2 * dlen; - attrptr = (struct ntlmssp2_name *) blobptr; - - attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_COMPUTER_NAME); - attrptr->length = cpu_to_le16(2 * wlen); - blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); - cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); - - blobptr += 2 * wlen; - attrptr = (struct ntlmssp2_name *) blobptr; - - attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_DOMAIN_NAME); - attrptr->length = cpu_to_le16(2 * dlen); - blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); - cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); - - blobptr += 2 * dlen; - attrptr = (struct ntlmssp2_name *) blobptr; - - attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_COMPUTER_NAME); - attrptr->length = cpu_to_le16(2 * wlen); - blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); - cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); - - blobptr += 2 * wlen; - attrptr = (struct ntlmssp2_name *) blobptr; - - attrptr->type = cpu_to_le16(NTLMSSP_AV_TIMESTAMP); - attrptr->length = cpu_to_le16(sizeof(__le64)); - blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); - curtime = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - memcpy(blobptr, &curtime, sizeof(__le64)); - return 0; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f93eb948d07..54b8f1e7da9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -548,6 +548,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) struct inode *dir = dentry->d_inode; struct dentry *child; + if (!dir) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + break; + } + /* skip separators */ while (*s == sep) s++; @@ -563,10 +569,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) mutex_unlock(&dir->i_mutex); dput(dentry); dentry = child; - if (!dentry->d_inode) { - dput(dentry); - dentry = ERR_PTR(-ENOENT); - } } while (!IS_ERR(dentry)); _FreeXid(xid); kfree(full_path); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index aac37d99a48..a80f7bd97b9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4079,7 +4079,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon, T2_FNEXT_RSP_PARMS *parms; char *response_data; int rc = 0; - int bytes_returned, name_len; + int bytes_returned; + unsigned int name_len; __u16 params, byte_count; cFYI(1, "In FindNext"); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 633c246b677..71beb020197 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1298,7 +1298,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* ignore */ } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ - } else if (strnicmp(data, "rw", 2) == 0) { + } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) { /* ignore */ } else if (strnicmp(data, "ro", 2) == 0) { /* ignore */ @@ -1401,7 +1401,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->server_ino = 1; } else if (strnicmp(data, "noserverino", 9) == 0) { vol->server_ino = 0; - } else if (strnicmp(data, "rwpidforward", 4) == 0) { + } else if (strnicmp(data, "rwpidforward", 12) == 0) { vol->rwpidforward = 1; } else if (strnicmp(data, "cifsacl", 7) == 0) { vol->cifs_acl = 1; @@ -2018,7 +2018,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) warned_on_ntlm = true; cERROR(1, "default security mechanism requested. The default " "security mechanism will be upgraded from ntlm to " - "ntlmv2 in kernel release 3.1"); + "ntlmv2 in kernel release 3.2"); } ses->overrideSecFlg = volume_info->secFlg; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 04da6acde85..12661e1deed 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2807,7 +2807,7 @@ make_io: trace_ext3_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 5571708b6a5..0629e09f651 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -922,7 +922,8 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, + 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 18d2558b762..986e2388f03 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -647,7 +647,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -3298,7 +3298,7 @@ make_io: trace_ext4_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f8068c7bae9..1c924faeb6c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -922,7 +922,8 @@ restart: bh = ext4_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, + 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 85c62923ee2..59864643436 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) bh->b_end_io = end_buffer_write_sync; get_bh(bh); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) - submit_bh(WRITE_SYNC | REQ_META, bh); + submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); else - submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); + submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 747238cd9f9..be29858900f 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = REQ_META | + int write_op = REQ_META | REQ_PRIO | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); BUG_ON(!PageLocked(page)); @@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ_SYNC | REQ_META, bh); + submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); if (!(flags & DIO_WAIT)) return 0; @@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3bc073a4cf8..079587e5384 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - submit_bio(READ_SYNC | REQ_META, bio); + submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 42e8d23bc04..0e8bb13381e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -709,7 +709,7 @@ get_a_page: set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c106ca22e81..d24a9b666a2 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -344,6 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) struct inode *root, *inode; struct qstr str; struct nls_table *nls = NULL; + u64 last_fs_block, last_fs_page; int err; err = -EINVAL; @@ -399,9 +400,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->rsrc_clump_blocks) sbi->rsrc_clump_blocks = 1; - err = generic_check_addressable(sbi->alloc_blksz_shift, - sbi->total_blocks); - if (err) { + err = -EFBIG; + last_fs_block = sbi->total_blocks - 1; + last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >> + PAGE_CACHE_SHIFT; + + if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || + (last_fs_page > (pgoff_t)(~0ULL))) { printk(KERN_ERR "hfs: filesystem size too large.\n"); goto out_free_vhdr; } @@ -525,8 +530,8 @@ out_close_cat_tree: out_close_ext_tree: hfs_btree_close(sbi->ext_tree); out_free_vhdr: - kfree(sbi->s_vhdr); - kfree(sbi->s_backup_vhdr); + kfree(sbi->s_vhdr_buf); + kfree(sbi->s_backup_vhdr_buf); out_unload_nls: unload_nls(sbi->nls); unload_nls(nls); diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 10e515a0d45..7daf4b852d1 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -272,9 +272,9 @@ reread: return 0; out_free_backup_vhdr: - kfree(sbi->s_backup_vhdr); + kfree(sbi->s_backup_vhdr_buf); out_free_vhdr: - kfree(sbi->s_vhdr); + kfree(sbi->s_vhdr_buf); out: return error; } diff --git a/fs/namei.c b/fs/namei.c index b52bc685465..0b3138de2a3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -721,12 +721,6 @@ static int follow_automount(struct path *path, unsigned flags, if (!path->dentry->d_op || !path->dentry->d_op->d_automount) return -EREMOTE; - /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT - * and this is the terminal part of the path. - */ - if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) - return -EISDIR; /* we actually want to stop here */ - /* We don't want to mount if someone's just doing a stat - * unless they're stat'ing a directory and appended a '/' to * the name. @@ -739,7 +733,7 @@ static int follow_automount(struct path *path, unsigned flags, * of the daemon to instantiate them before they can be used. */ if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE)) && + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && path->dentry->d_inode) return -EISDIR; @@ -2616,6 +2610,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (!dir->i_op->rmdir) return -EPERM; + dget(dentry); mutex_lock(&dentry->d_inode->i_mutex); error = -EBUSY; @@ -2636,6 +2631,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) out: mutex_unlock(&dentry->d_inode->i_mutex); + dput(dentry); if (!error) d_delete(dentry); return error; @@ -3025,6 +3021,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; + dget(new_dentry); if (target) mutex_lock(&target->i_mutex); @@ -3045,6 +3042,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, out: if (target) mutex_unlock(&target->i_mutex); + dput(new_dentry); if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry,new_dentry); diff --git a/fs/namespace.c b/fs/namespace.c index 22bfe8273c6..b4febb29d3b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1757,7 +1757,7 @@ static int do_loopback(struct path *path, char *old_name, return err; if (!old_name || !*old_name) return -EINVAL; - err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); + err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); if (err) return err; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1ec1a85fa71..3e93e9a1bee 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -56,6 +56,9 @@ enum nfs4_session_state { NFS4_SESSION_DRAINING, }; +#define NFS4_RENEW_TIMEOUT 0x01 +#define NFS4_RENEW_DELEGATION_CB 0x02 + struct nfs4_minor_version_ops { u32 minor_version; @@ -225,7 +228,7 @@ struct nfs4_state_recovery_ops { }; struct nfs4_state_maintenance_ops { - int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); + int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; @@ -237,8 +240,6 @@ extern const struct inode_operations nfs4_dir_inode_operations; extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); -extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); @@ -349,6 +350,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); +extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8c77039e7a8..4700fae1ada 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3374,9 +3374,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ - if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) + return; + if (task->tk_status != NFS4ERR_CB_PATH_DOWN) { nfs4_schedule_lease_recovery(clp); - return; + return; + } + nfs4_schedule_path_down_recovery(clp); } do_renew_lease(clp, timestamp); } @@ -3386,7 +3390,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -3395,9 +3399,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) }; struct nfs4_renewdata *data; + if (renew_flags == 0) + return 0; if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kmalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; data->client = clp; @@ -3406,7 +3412,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) &nfs4_renew_ops, data); } -int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -5504,11 +5510,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ return rpc_run_task(&task_setup_data); } -static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) { struct rpc_task *task; int ret = 0; + if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) + return 0; task = _nfs41_proc_sequence(clp, cred); if (IS_ERR(task)) ret = PTR_ERR(task); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index df8e7f3ca56..dc484c0eae7 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work) struct rpc_cred *cred; long lease; unsigned long last, now; + unsigned renew_flags = 0; ops = clp->cl_mvops->state_renewal_ops; dprintk("%s: start\n", __func__); @@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work) last = clp->cl_last_renewal; now = jiffies; /* Are we close to a lease timeout? */ - if (time_after(now, last + lease/3)) { + if (time_after(now, last + lease/3)) + renew_flags |= NFS4_RENEW_TIMEOUT; + if (nfs_delegations_present(clp)) + renew_flags |= NFS4_RENEW_DELEGATION_CB; + + if (renew_flags != 0) { cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { - if (!nfs_delegations_present(clp)) { + if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } nfs_expire_all_delegations(clp); } else { /* Queue an asynchronous RENEW. */ - ops->sched_state_renewal(clp, cred); + ops->sched_state_renewal(clp, cred, renew_flags); put_rpccred(cred); goto out_exp; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 72ab97ef3d6..39914be40b0 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +void nfs4_schedule_path_down_recovery(struct nfs_client *clp) +{ + nfs_handle_cb_pathdown(clp); + nfs4_schedule_state_manager(clp); +} + static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b961ceac66b..5b19b6aabe1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2035,9 +2035,6 @@ static inline void nfs_initialise_sb(struct super_block *sb) sb->s_blocksize = nfs_block_bits(server->wsize, &sb->s_blocksize_bits); - if (server->flags & NFS_MOUNT_NOAC) - sb->s_flags |= MS_SYNCHRONOUS; - sb->s_bdi = &server->backing_dev_info; nfs_super_set_maxbytes(sb, server->maxfilesize); @@ -2249,6 +2246,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; + /* -o noac implies -o sync */ + if (server->flags & NFS_MOUNT_NOAC) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { @@ -2361,6 +2362,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; + /* -o noac implies -o sync */ + if (server->flags & NFS_MOUNT_NOAC) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { @@ -2628,6 +2633,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, if (server->flags & NFS4_MOUNT_UNSHARED) compare_super = NULL; + /* -o noac implies -o sync */ + if (server->flags & NFS_MOUNT_NOAC) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { @@ -2789,7 +2798,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, goto out_put_mnt_ns; ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, - export_path, LOOKUP_FOLLOW, &path); + export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); nfs_referral_loop_unprotect(); put_mnt_ns(ns_private); @@ -2916,6 +2925,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, if (server->flags & NFS4_MOUNT_UNSHARED) compare_super = NULL; + /* -o noac implies -o sync */ + if (server->flags & NFS_MOUNT_NOAC) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { @@ -3003,6 +3016,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, if (server->flags & NFS4_MOUNT_UNSHARED) compare_super = NULL; + /* -o noac implies -o sync */ + if (server->flags & NFS_MOUNT_NOAC) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b39b37f8091..c9bd2a6b7d4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -958,7 +958,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head if (!data) goto out_bad; data->pagevec[0] = page; - nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); list_add(&data->list, res); requests++; nbytes -= len; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 25b6a887adb..5afaa58a863 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -877,30 +877,54 @@ struct numa_maps_private { struct numa_maps md; }; -static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) +static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, + unsigned long nr_pages) { int count = page_mapcount(page); - md->pages++; + md->pages += nr_pages; if (pte_dirty || PageDirty(page)) - md->dirty++; + md->dirty += nr_pages; if (PageSwapCache(page)) - md->swapcache++; + md->swapcache += nr_pages; if (PageActive(page) || PageUnevictable(page)) - md->active++; + md->active += nr_pages; if (PageWriteback(page)) - md->writeback++; + md->writeback += nr_pages; if (PageAnon(page)) - md->anon++; + md->anon += nr_pages; if (count > md->mapcount_max) md->mapcount_max = count; - md->node[page_to_nid(page)]++; + md->node[page_to_nid(page)] += nr_pages; +} + +static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + int nid; + + if (!pte_present(pte)) + return NULL; + + page = vm_normal_page(vma, addr, pte); + if (!page) + return NULL; + + if (PageReserved(page)) + return NULL; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_HIGH_MEMORY])) + return NULL; + + return page; } static int gather_pte_stats(pmd_t *pmd, unsigned long addr, @@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *pte; md = walk->private; - orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - do { - struct page *page; - int nid; + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge(*pmd)) { + if (pmd_trans_splitting(*pmd)) { + spin_unlock(&walk->mm->page_table_lock); + wait_split_huge_page(md->vma->anon_vma, pmd); + } else { + pte_t huge_pte = *(pte_t *)pmd; + struct page *page; - if (!pte_present(*pte)) - continue; + page = can_gather_numa_stats(huge_pte, md->vma, addr); + if (page) + gather_stats(page, md, pte_dirty(huge_pte), + HPAGE_PMD_SIZE/PAGE_SIZE); + spin_unlock(&walk->mm->page_table_lock); + return 0; + } + } else { + spin_unlock(&walk->mm->page_table_lock); + } - page = vm_normal_page(md->vma, addr, *pte); + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + do { + struct page *page = can_gather_numa_stats(*pte, md->vma, addr); if (!page) continue; - - if (PageReserved(page)) - continue; - - nid = page_to_nid(page); - if (!node_isset(nid, node_states[N_HIGH_MEMORY])) - continue; - - gather_stats(page, md, pte_dirty(*pte)); + gather_stats(page, md, pte_dirty(*pte), 1); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); @@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, return 0; md = walk->private; - gather_stats(page, md, pte_dirty(*pte)); + gather_stats(page, md, pte_dirty(*pte), 1); return 0; } diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b34bdb25490..10b6be3ca28 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -355,7 +355,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, * resolution (think about autofs) and thus deadlocks could arise. */ if (cmds == Q_QUOTAON) { - ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); + ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); if (ret) pathp = ERR_PTR(ret); else diff --git a/fs/stat.c b/fs/stat.c index ba5316ffac6..78a3aa83c7e 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -81,8 +81,6 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, if (!(flag & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; - if (flag & AT_NO_AUTOMOUNT) - lookup_flags |= LOOKUP_NO_AUTOMOUNT; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 63e971e2b83..8c37dde4c52 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1300,6 +1300,7 @@ xfs_end_io_direct_write( bool is_async) { struct xfs_ioend *ioend = iocb->private; + struct inode *inode = ioend->io_inode; /* * blockdev_direct_IO can return an error even after the I/O @@ -1331,7 +1332,7 @@ xfs_end_io_direct_write( } /* XXX: probably should move into the real I/O completion handler */ - inode_dio_done(ioend->io_inode); + inode_dio_done(inode); } STATIC ssize_t diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index cac2ecfa674..ef43fce519a 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -629,7 +629,7 @@ xfs_buf_item_push( * the xfsbufd to get this buffer written. We have to unlock the buffer * to allow the xfsbufd to write it, too. */ -STATIC void +STATIC bool xfs_buf_item_pushbuf( struct xfs_log_item *lip) { @@ -643,6 +643,7 @@ xfs_buf_item_pushbuf( xfs_buf_delwri_promote(bp); xfs_buf_relse(bp); + return true; } STATIC void diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 9e0e2fa3f2c..bb3f71d236d 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait( * search the buffer cache can be a time consuming thing, and AIL lock is a * spinlock. */ -STATIC void +STATIC bool xfs_qm_dquot_logitem_pushbuf( struct xfs_log_item *lip) { struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_buf *bp; + bool ret = true; ASSERT(XFS_DQ_IS_LOCKED(dqp)); @@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf( if (completion_done(&dqp->q_flush) || !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_dqunlock(dqp); - return; + return true; } bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); xfs_dqunlock(dqp); if (!bp) - return; + return true; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); + if (xfs_buf_ispinned(bp)) + ret = false; xfs_buf_relse(bp); + return ret; } /* diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 588406dc6a3..836ad80d4f2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -708,13 +708,14 @@ xfs_inode_item_committed( * marked delayed write. If that's the case, we'll promote it and that will * allow the caller to write the buffer by triggering the xfsbufd to run. */ -STATIC void +STATIC bool xfs_inode_item_pushbuf( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp; + bool ret = true; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); @@ -725,7 +726,7 @@ xfs_inode_item_pushbuf( if (completion_done(&ip->i_flush) || !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); - return; + return true; } bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, @@ -733,10 +734,13 @@ xfs_inode_item_pushbuf( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!bp) - return; + return true; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); + if (xfs_buf_ispinned(bp)) + ret = false; xfs_buf_relse(bp); + return ret; } /* diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 1e8a45e74c3..828662f70d6 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -68,6 +68,8 @@ #include <linux/ctype.h> #include <linux/writeback.h> #include <linux/capability.h> +#include <linux/kthread.h> +#include <linux/freezer.h> #include <linux/list_sort.h> #include <asm/page.h> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2366c54cc4f..5cf06b85fd9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1652,24 +1652,13 @@ xfs_init_workqueues(void) */ xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); if (!xfs_syncd_wq) - goto out; - - xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); - if (!xfs_ail_wq) - goto out_destroy_syncd; - + return -ENOMEM; return 0; - -out_destroy_syncd: - destroy_workqueue(xfs_syncd_wq); -out: - return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { - destroy_workqueue(xfs_ail_wq); destroy_workqueue(xfs_syncd_wq); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 06a9759b635..53597f4db9b 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -350,7 +350,7 @@ typedef struct xfs_item_ops { void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); void (*iop_push)(xfs_log_item_t *); - void (*iop_pushbuf)(xfs_log_item_t *); + bool (*iop_pushbuf)(xfs_log_item_t *); void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); } xfs_item_ops_t; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index c15aa29fa16..3a1e7ca54c2 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -28,8 +28,6 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ - #ifdef DEBUG /* * Check that the list is sorted as it should be. @@ -356,16 +354,10 @@ xfs_ail_delete( xfs_trans_ail_cursor_clear(ailp, lip); } -/* - * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself - * to run at a later time if there is more work to do to complete the push. - */ -STATIC void -xfs_ail_worker( - struct work_struct *work) +static long +xfsaild_push( + struct xfs_ail *ailp) { - struct xfs_ail *ailp = container_of(to_delayed_work(work), - struct xfs_ail, xa_work); xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor cur; xfs_log_item_t *lip; @@ -427,8 +419,13 @@ xfs_ail_worker( case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); - IOP_PUSHBUF(lip); - ailp->xa_last_pushed_lsn = lsn; + + if (!IOP_PUSHBUF(lip)) { + stuck++; + flush_log = 1; + } else { + ailp->xa_last_pushed_lsn = lsn; + } push_xfsbufd = 1; break; @@ -440,7 +437,6 @@ xfs_ail_worker( case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); - ailp->xa_last_pushed_lsn = lsn; stuck++; break; @@ -501,20 +497,6 @@ out_done: /* We're past our target or empty, so idle */ ailp->xa_last_pushed_lsn = 0; - /* - * We clear the XFS_AIL_PUSHING_BIT first before checking - * whether the target has changed. If the target has changed, - * this pushes the requeue race directly onto the result of the - * atomic test/set bit, so we are guaranteed that either the - * the pusher that changed the target or ourselves will requeue - * the work (but not both). - */ - clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); - smp_rmb(); - if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || - test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) - return; - tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* @@ -537,9 +519,30 @@ out_done: tout = 20; } - /* There is more to do, requeue us. */ - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, - msecs_to_jiffies(tout)); + return tout; +} + +static int +xfsaild( + void *data) +{ + struct xfs_ail *ailp = data; + long tout = 0; /* milliseconds */ + + while (!kthread_should_stop()) { + if (tout && tout <= 20) + __set_current_state(TASK_KILLABLE); + else + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); + + try_to_freeze(); + + tout = xfsaild_push(ailp); + } + + return 0; } /* @@ -574,8 +577,9 @@ xfs_ail_push( */ smp_wmb(); xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); - if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); + smp_wmb(); + + wake_up_process(ailp->xa_task); } /* @@ -813,9 +817,18 @@ xfs_trans_ail_init( INIT_LIST_HEAD(&ailp->xa_ail); INIT_LIST_HEAD(&ailp->xa_cursors); spin_lock_init(&ailp->xa_lock); - INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); + + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", + ailp->xa_mount->m_fsname); + if (IS_ERR(ailp->xa_task)) + goto out_free_ailp; + mp->m_ail = ailp; return 0; + +out_free_ailp: + kmem_free(ailp); + return ENOMEM; } void @@ -824,6 +837,6 @@ xfs_trans_ail_destroy( { struct xfs_ail *ailp = mp->m_ail; - cancel_delayed_work_sync(&ailp->xa_work); + kthread_stop(ailp->xa_task); kmem_free(ailp); } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 212946b9723..22750b5e4a8 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -64,23 +64,17 @@ struct xfs_ail_cursor { */ struct xfs_ail { struct xfs_mount *xa_mount; + struct task_struct *xa_task; struct list_head xa_ail; xfs_lsn_t xa_target; struct list_head xa_cursors; spinlock_t xa_lock; - struct delayed_work xa_work; xfs_lsn_t xa_last_pushed_lsn; - unsigned long xa_flags; }; -#define XFS_AIL_PUSHING_BIT 0 - /* * From xfs_trans_ail.c */ - -extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ - void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h index 98999cf107c..feb91219674 100644 --- a/include/linux/basic_mmio_gpio.h +++ b/include/linux/basic_mmio_gpio.h @@ -63,15 +63,10 @@ static inline struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc) return container_of(gc, struct bgpio_chip, gc); } -int __devexit bgpio_remove(struct bgpio_chip *bgc); -int __devinit bgpio_init(struct bgpio_chip *bgc, - struct device *dev, - unsigned long sz, - void __iomem *dat, - void __iomem *set, - void __iomem *clr, - void __iomem *dirout, - void __iomem *dirin, - bool big_endian); +int bgpio_remove(struct bgpio_chip *bgc); +int bgpio_init(struct bgpio_chip *bgc, struct device *dev, + unsigned long sz, void __iomem *dat, void __iomem *set, + void __iomem *clr, void __iomem *dirout, void __iomem *dirin, + bool big_endian); #endif /* __BASIC_MMIO_GPIO_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 32f0076e844..71fc53bb8f1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -124,6 +124,7 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ + __REQ_PRIO, /* boost priority in cfq */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ @@ -161,14 +162,15 @@ enum rq_flag_bits { #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1 << __REQ_SYNC) #define REQ_META (1 << __REQ_META) +#define REQ_PRIO (1 << __REQ_PRIO) #define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ - REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ + REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_RAHEAD (1 << __REQ_RAHEAD) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b15d54f8c..7fbaa910334 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -873,7 +873,6 @@ struct blk_plug { struct list_head list; struct list_head cb_list; unsigned int should_sort; - unsigned int count; }; #define BLK_MAX_REQUEST_COUNT 16 diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3fa1f3d90ce..99e3e50b5c5 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -197,6 +197,11 @@ struct dm_target { * whether or not its underlying devices have support. */ unsigned discards_supported:1; + + /* + * Set if this target does not return zeroes on discarded blocks. + */ + unsigned discard_zeroes_data_unsupported:1; }; /* Each target can link one of these into the table */ diff --git a/include/linux/fs.h b/include/linux/fs.h index c2bd68f2277..277f497923a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -162,10 +162,8 @@ struct inodes_stat_t { #define READA RWA_MASK #define READ_SYNC (READ | REQ_SYNC) -#define READ_META (READ | REQ_META) #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_META (WRITE | REQ_META) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e807ad687a0..3ad553e8eae 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -80,6 +80,7 @@ extern void irq_domain_del(struct irq_domain *domain); #endif /* CONFIG_IRQ_DOMAIN */ #if defined(CONFIG_IRQ_DOMAIN) && defined(CONFIG_OF_IRQ) +extern struct irq_domain_ops irq_domain_simple_ops; extern void irq_domain_add_simple(struct device_node *controller, int irq_base); extern void irq_domain_generate_simple(const struct of_device_id *match, u64 phys_base, unsigned int irq_start); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2c366b52f50..aace6b8691a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -553,6 +553,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_SPAPR_TCE 63 #define KVM_CAP_PPC_SMT 64 #define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_S390_GMAP 71 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3b535db00a9..343bd7661f2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -39,16 +39,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct mem_cgroup *mem_cont, int active, int file); -struct memcg_scanrecord { - struct mem_cgroup *mem; /* scanend memory cgroup */ - struct mem_cgroup *root; /* scan target hierarchy root */ - int context; /* scanning context (see memcontrol.c) */ - unsigned long nr_scanned[2]; /* the number of scanned pages */ - unsigned long nr_rotated[2]; /* the number of rotated pages */ - unsigned long nr_freed[2]; /* the number of freed pages */ - unsigned long elapsed; /* nsec of time elapsed while scanning */ -}; - #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All "charge" functions with gfp_mask should use GFP_KERNEL or @@ -127,15 +117,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); -extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap, - struct memcg_scanrecord *rec); -extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap, - struct zone *zone, - struct memcg_scanrecord *rec, - unsigned long *nr_scanned); - #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index d12f8d635a8..97cf4f27d64 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -26,7 +26,7 @@ struct wm8994_ldo_pdata { struct regulator_init_data *init_data; }; -#define WM8994_CONFIGURE_GPIO 0x8000 +#define WM8994_CONFIGURE_GPIO 0x10000 #define WM8994_DRC_REGS 5 #define WM8994_EQ_REGS 20 diff --git a/include/linux/namei.h b/include/linux/namei.h index 76fe2c62ae7..409328d1cbb 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -48,11 +48,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; */ #define LOOKUP_FOLLOW 0x0001 #define LOOKUP_DIRECTORY 0x0002 +#define LOOKUP_AUTOMOUNT 0x0004 #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 -#define LOOKUP_NO_AUTOMOUNT 0x0080 + /* * Intent data */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c230cbcbb4..9fc01226055 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -621,8 +621,9 @@ struct pci_driver { extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); enum pcie_bus_config_types { - PCIE_BUS_PERFORMANCE, + PCIE_BUS_TUNE_OFF, PCIE_BUS_SAFE, + PCIE_BUS_PERFORMANCE, PCIE_BUS_PEER2PEER, }; diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index e07e2742a86..1dc420ba213 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -51,6 +51,7 @@ #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN) #define PTP_EV_PORT 319 +#define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ #define OFF_ETYPE 12 #define OFF_IHL 14 @@ -116,14 +117,20 @@ static inline int ptp_filter_init(struct sock_filter *f, int len) {OP_OR, 0, 0, PTP_CLASS_IPV6 }, /* */ \ {OP_RETA, 0, 0, 0 }, /* */ \ /*L3x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L40*/ {OP_JEQ, 0, 6, ETH_P_8021Q }, /* f goto L50 */ \ +/*L40*/ {OP_JEQ, 0, 9, ETH_P_8021Q }, /* f goto L50 */ \ {OP_LDH, 0, 0, OFF_ETYPE + 4 }, /* */ \ - {OP_JEQ, 0, 9, ETH_P_1588 }, /* f goto L60 */ \ + {OP_JEQ, 0, 15, ETH_P_1588 }, /* f goto L60 */ \ + {OP_LDB, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ + {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ + {OP_JEQ, 0, 12, 0 }, /* f goto L6x */ \ {OP_LDH, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ {OP_OR, 0, 0, PTP_CLASS_VLAN }, /* */ \ {OP_RETA, 0, 0, 0 }, /* */ \ -/*L50*/ {OP_JEQ, 0, 4, ETH_P_1588 }, /* f goto L61 */ \ +/*L50*/ {OP_JEQ, 0, 7, ETH_P_1588 }, /* f goto L61 */ \ + {OP_LDB, 0, 0, ETH_HLEN }, /* */ \ + {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ + {OP_JEQ, 0, 4, 0 }, /* f goto L6x */ \ {OP_LDH, 0, 0, ETH_HLEN }, /* */ \ {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ {OP_OR, 0, 0, PTP_CLASS_L2 }, /* */ \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0..41d0237fd44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1956,7 +1956,6 @@ static inline void disable_sched_clock_irqtime(void) {} extern unsigned long long task_sched_runtime(struct task_struct *task); -extern unsigned long long thread_group_sched_runtime(struct task_struct *task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b996ed86d5..8bd383caa36 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 12b2b18e50c..e16557a357e 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -231,6 +231,8 @@ enum LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */ + LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */ + LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ __LINUX_MIB_MAX }; diff --git a/include/linux/swap.h b/include/linux/swap.h index 14d62490922..c71f84bb62e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -252,6 +252,12 @@ static inline void lru_cache_add_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, int mode, int file); +extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap); +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + struct zone *zone, + unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); diff --git a/include/net/flow.h b/include/net/flow.h index 78113daadd6..a09447749e2 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -7,6 +7,7 @@ #ifndef _NET_FLOW_H #define _NET_FLOW_H +#include <linux/socket.h> #include <linux/in6.h> #include <linux/atomic.h> @@ -68,7 +69,7 @@ struct flowi4 { #define fl4_ipsec_spi uli.spi #define fl4_mh_type uli.mht.type #define fl4_gre_key uli.gre_key -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, @@ -112,7 +113,7 @@ struct flowi6 { #define fl6_ipsec_spi uli.spi #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowidn { struct flowi_common __fl_common; @@ -127,7 +128,7 @@ struct flowidn { union flowi_uli uli; #define fld_sport uli.ports.sport #define fld_dport uli.ports.dport -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowi { union { @@ -161,6 +162,24 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) return container_of(fldn, struct flowi, u.dn); } +typedef unsigned long flow_compare_t; + +static inline size_t flow_key_size(u16 family) +{ + switch (family) { + case AF_INET: + BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t)); + return sizeof(struct flowi4) / sizeof(flow_compare_t); + case AF_INET6: + BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t)); + return sizeof(struct flowi6) / sizeof(flow_compare_t); + case AF_DECnet: + BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t)); + return sizeof(struct flowidn) / sizeof(flow_compare_t); + } + return 0; +} + #define FLOW_DIR_IN 0 #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1aaf915656f..8fa4430f99c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -900,6 +900,7 @@ struct netns_ipvs { volatile int sync_state; volatile int master_syncid; volatile int backup_syncid; + struct mutex sync_mutex; /* multicast interface name */ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 99e6e19b57c..4c0766e201e 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog; */ struct listen_sock { u8 max_qlen_log; - /* 3 bytes hole, try to use */ + u8 synflood_warned; + /* 2 bytes hole, try to use */ int qlen; int qlen_young; int clock_hand; diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6506458ccd3..712b3bebeda 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -109,6 +109,7 @@ typedef enum { SCTP_CMD_SEND_MSG, /* Send the whole use message */ SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ + SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST } sctp_verb_t; diff --git a/include/net/tcp.h b/include/net/tcp.h index 149a415d1e0..acc620a4a45 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -431,17 +431,34 @@ extern int tcp_disconnect(struct sock *sk, int flags); extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); +#ifdef CONFIG_SYN_COOKIES extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); +#else +static inline __u32 cookie_v4_init_sequence(struct sock *sk, + struct sk_buff *skb, + __u16 *mss) +{ + return 0; +} +#endif extern __u32 cookie_init_timestamp(struct request_sock *req); extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *); /* From net/ipv6/syncookies.c */ extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_SYN_COOKIES extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); - +#else +static inline __u32 cookie_v6_init_sequence(struct sock *sk, + struct sk_buff *skb, + __u16 *mss) +{ + return 0; +} +#endif /* tcp_output.c */ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, @@ -460,6 +477,9 @@ extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); extern int tcp_send_synack(struct sock *); +extern int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 5271a741c3a..498433dd067 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -39,6 +39,7 @@ extern int datagram_recv_ctl(struct sock *sk, struct sk_buff *skb); extern int datagram_send_ctl(struct net *net, + struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, diff --git a/include/net/udplite.h b/include/net/udplite.h index 673a024c6b2..5f097ca7d5c 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,40 +66,34 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) +/* Slow-path computation of checksum. Socket is locked. */ +static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) { + const struct udp_sock *up = udp_sk(skb->sk); int cscov = up->len; + __wsum csum = 0; - /* - * Sender has set `partial coverage' option on UDP-Lite socket - */ - if (up->pcflag & UDPLITE_SEND_CC) { + if (up->pcflag & UDPLITE_SEND_CC) { + /* + * Sender has set `partial coverage' option on UDP-Lite socket. + * The special case "up->pcslen == 0" signifies full coverage. + */ if (up->pcslen < up->len) { - /* up->pcslen == 0 means that full coverage is required, - * partial coverage only if 0 < up->pcslen < up->len */ - if (0 < up->pcslen) { - cscov = up->pcslen; - } - uh->len = htons(up->pcslen); + if (0 < up->pcslen) + cscov = up->pcslen; + udp_hdr(skb)->len = htons(up->pcslen); } - /* - * NOTE: Causes for the error case `up->pcslen > up->len': - * (i) Application error (will not be penalized). - * (ii) Payload too big for send buffer: data is split - * into several packets, each with its own header. - * In this case (e.g. last segment), coverage may - * exceed packet length. - * Since packets with coverage length > packet length are - * illegal, we fall back to the defaults here. - */ + /* + * NOTE: Causes for the error case `up->pcslen > up->len': + * (i) Application error (will not be penalized). + * (ii) Payload too big for send buffer: data is split + * into several packets, each with its own header. + * In this case (e.g. last segment), coverage may + * exceed packet length. + * Since packets with coverage length > packet length are + * illegal, we fall back to the defaults here. + */ } - return cscov; -} - -static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) -{ - int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); - __wsum csum = 0; skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ @@ -115,16 +109,21 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) return csum; } +/* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - struct sock *sk = skb->sk; - int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); + const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); - const int len = skb->len - off; + int len = skb->len - off; + if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { + if (0 < up->pcslen) + len = up->pcslen; + udp_hdr(skb)->len = htons(up->pcslen); + } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - return skb_checksum(skb, off, min(cscov, len), 0); + return skb_checksum(skb, off, len, 0); } extern void udplite4_register(void); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6bca4cc0063..5f172703eb4 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __array(char, name, 32) __field(unsigned long, ino) __field(unsigned long, state) - __field(unsigned long, age) + __field(unsigned long, dirtied_when) __field(unsigned long, writeback_index) __field(long, nr_to_write) __field(unsigned long, wrote) @@ -309,19 +309,19 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, dev_name(inode->i_mapping->backing_dev_info->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; - __entry->age = (jiffies - inode->dirtied_when) * - 1000 / HZ; + __entry->dirtied_when = inode->dirtied_when; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->nr_to_write = nr_to_write; __entry->wrote = nr_to_write - wbc->nr_to_write; ), - TP_printk("bdi %s: ino=%lu state=%s age=%lu " + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " "index=%lu to_write=%ld wrote=%lu", __entry->name, __entry->ino, show_inode_state(__entry->state), - __entry->age, + __entry->dirtied_when, + (jiffies - __entry->dirtied_when) / HZ, __entry->writeback_index, __entry->nr_to_write, __entry->wrote diff --git a/init/main.c b/init/main.c index 9c51ee7adf3..03b408dff82 100644 --- a/init/main.c +++ b/init/main.c @@ -209,8 +209,19 @@ early_param("quiet", quiet_kernel); static int __init loglevel(char *str) { - get_option(&str, &console_loglevel); - return 0; + int newlevel; + + /* + * Only update loglevel value when a correct setting was passed, + * to prevent blind crashes (when loglevel being set to 0) that + * are quite hard to debug + */ + if (get_option(&str, &newlevel)) { + console_loglevel = newlevel; + return 0; + } + + return -EINVAL; } early_param("loglevel", loglevel); @@ -370,9 +381,6 @@ static noinline void __init_refok rest_init(void) preempt_enable_no_resched(); schedule(); - /* At this point, we can enable user mode helper functionality */ - usermodehelper_enable(); - /* Call into cpu_idle with preempt disabled */ preempt_disable(); cpu_idle(); @@ -722,6 +730,7 @@ static void __init do_basic_setup(void) driver_init(); init_irq_proc(); do_ctors(); + usermodehelper_enable(); do_initcalls(); } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index d5a3009da71..dc5114b4c16 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc) desc->depth = 1; if (desc->irq_data.chip->irq_shutdown) desc->irq_data.chip->irq_shutdown(&desc->irq_data); - if (desc->irq_data.chip->irq_disable) + else if (desc->irq_data.chip->irq_disable) desc->irq_data.chip->irq_disable(&desc->irq_data); else desc->irq_data.chip->irq_mask(&desc->irq_data); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index d5828da3fd3..b57a3776de4 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -29,7 +29,11 @@ void irq_domain_add(struct irq_domain *domain) */ for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); - if (d || d->domain) { + if (!d) { + WARN(1, "error: assigning domain to non existant irq_desc"); + return; + } + if (d->domain) { /* things are broken; just report, don't clean up */ WARN(1, "error: irq_desc already assigned to a domain"); return; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 58f405b581e..640ded8f5c4 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) do { times->utime = cputime_add(times->utime, t->utime); times->stime = cputime_add(times->stime, t->stime); - times->sum_exec_runtime += t->se.sum_exec_runtime; + times->sum_exec_runtime += task_sched_runtime(t); } while_each_thread(tsk, t); out: rcu_read_unlock(); @@ -274,9 +274,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) struct task_cputime sum; unsigned long flags; - spin_lock_irqsave(&cputimer->lock, flags); if (!cputimer->running) { - cputimer->running = 1; /* * The POSIX timer interface allows for absolute time expiry * values through the TIMER_ABSTIME flag, therefore we have @@ -284,8 +282,11 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) * it. */ thread_group_cputime(tsk, &sum); + spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 1; update_gt_cputime(&cputimer->cputime, &sum); - } + } else + spin_lock_irqsave(&cputimer->lock, flags); *times = cputimer->cputime; spin_unlock_irqrestore(&cputimer->lock, flags); } @@ -312,7 +313,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock, cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = thread_group_sched_runtime(p); + thread_group_cputime(p, &cputime); + cpu->sched = cputime.sum_exec_runtime; break; } return 0; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 9de3ecfd20f..a70d2a5d8c7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -744,20 +744,17 @@ int ptrace_request(struct task_struct *child, long request, break; si = child->last_siginfo; - if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP)) - break; - - child->jobctl |= JOBCTL_LISTENING; - - /* - * If NOTIFY is set, it means event happened between start - * of this trap and now. Trigger re-trap immediately. - */ - if (child->jobctl & JOBCTL_TRAP_NOTIFY) - signal_wake_up(child, true); - + if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) { + child->jobctl |= JOBCTL_LISTENING; + /* + * If NOTIFY is set, it means event happened between + * start of this trap and now. Trigger re-trap. + */ + if (child->jobctl & JOBCTL_TRAP_NOTIFY) + signal_wake_up(child, true); + ret = 0; + } unlock_task_sighand(child, &flags); - ret = 0; break; case PTRACE_DETACH: /* detach a process that was attached. */ diff --git a/kernel/resource.c b/kernel/resource.c index 3b3cedc5259..c8dc249da5c 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -419,6 +419,9 @@ static int __find_resource(struct resource *root, struct resource *old, else tmp.end = root->end; + if (tmp.end < tmp.start) + goto next; + resource_clip(&tmp, constraint->min, constraint->max); arch_remove_reservations(&tmp); @@ -436,8 +439,10 @@ static int __find_resource(struct resource *root, struct resource *old, return 0; } } - if (!this) + +next: if (!this || this->end == root->end) break; + if (this != old) tmp.start = this->end + 1; this = this->sibling; diff --git a/kernel/sched.c b/kernel/sched.c index ec5f472bc5b..b50b0f0c9aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3725,30 +3725,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) } /* - * Return sum_exec_runtime for the thread group. - * In case the task is currently running, return the sum plus current's - * pending runtime that have not been accounted yet. - * - * Note that the thread group might have other running tasks as well, - * so the return value not includes other pending runtime that other - * running tasks might have. - */ -unsigned long long thread_group_sched_runtime(struct task_struct *p) -{ - struct task_cputime totals; - unsigned long flags; - struct rq *rq; - u64 ns; - - rq = task_rq_lock(p, &flags); - thread_group_cputime(p, &totals); - ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); - - return ns; -} - -/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in user space since the last update @@ -4372,7 +4348,7 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } -asmlinkage void schedule(void) +asmlinkage void __sched schedule(void) { struct task_struct *tsk = current; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 97540f0c9e4..af1177858be 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1050,7 +1050,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) */ if (curr && unlikely(rt_task(curr)) && (curr->rt.nr_cpus_allowed < 2 || - curr->prio < p->prio) && + curr->prio <= p->prio) && (p->rt.nr_cpus_allowed > 1)) { int target = find_lowest_rq(p); @@ -1581,7 +1581,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) p->rt.nr_cpus_allowed > 1 && rt_task(rq->curr) && (rq->curr->rt.nr_cpus_allowed < 2 || - rq->curr->prio < p->prio)) + rq->curr->prio <= p->prio)) push_rt_tasks(rq); } diff --git a/kernel/sys.c b/kernel/sys.c index 18ee1d2f647..1dbbe695a5e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1172,7 +1172,7 @@ DECLARE_RWSEM(uts_sem); static int override_release(char __user *release, int len) { int ret = 0; - char buf[len]; + char buf[65]; if (current->personality & UNAME26) { char *rest = UTS_RELEASE; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e19ce1454ee..e66046456f4 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = { .cmd = TASKSTATS_CMD_GET, .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, + .flags = GENL_ADMIN_PERM, }; static struct genl_ops cgroupstats_ops = { diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 24dc60d9fa1..5bbfac85866 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) #define KB 1024 #define MB (1024*KB) +#define KB_MASK (~(KB-1)) /* * fill in extended accounting fields */ @@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; mmput(mm); } - stats->read_char = p->ioac.rchar; - stats->write_char = p->ioac.wchar; - stats->read_syscalls = p->ioac.syscr; - stats->write_syscalls = p->ioac.syscw; + stats->read_char = p->ioac.rchar & KB_MASK; + stats->write_char = p->ioac.wchar & KB_MASK; + stats->read_syscalls = p->ioac.syscr & KB_MASK; + stats->write_syscalls = p->ioac.syscw & KB_MASK; #ifdef CONFIG_TASK_IO_ACCOUNTING - stats->read_bytes = p->ioac.read_bytes; - stats->write_bytes = p->ioac.write_bytes; - stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes; + stats->read_bytes = p->ioac.read_bytes & KB_MASK; + stats->write_bytes = p->ioac.write_bytes & KB_MASK; + stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK; #else stats->read_bytes = 0; stats->write_bytes = 0; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 25fb1b0e53f..1783aabc612 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2412,8 +2412,13 @@ reflush: for_each_cwq_cpu(cpu, wq) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + bool drained; - if (!cwq->nr_active && list_empty(&cwq->delayed_works)) + spin_lock_irq(&cwq->gcwq->lock); + drained = !cwq->nr_active && list_empty(&cwq->delayed_works); + spin_unlock_irq(&cwq->gcwq->lock); + + if (drained) continue; if (++flush_cnt == 10 || diff --git a/lib/sha1.c b/lib/sha1.c index f33271dd00c..1de509a159c 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bitops.h> +#include <linux/cryptohash.h> #include <asm/unaligned.h> /* diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index e51e2558ca9..a768e6d28bb 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -441,8 +441,12 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, * next filter in the chain. Apply the BCJ filter on the new data * in the output buffer. If everything cannot be filtered, copy it * to temp and rewind the output buffer position accordingly. + * + * This needs to be always run when temp.size == 0 to handle a special + * case where the output buffer is full and the next filter has no + * more output coming but hasn't returned XZ_STREAM_END yet. */ - if (s->temp.size < b->out_size - b->out_pos) { + if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) { out_start = b->out_pos; memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); b->out_pos += s->temp.size; @@ -465,16 +469,25 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, s->temp.size = b->out_pos - out_start; b->out_pos -= s->temp.size; memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); + + /* + * If there wasn't enough input to the next filter to fill + * the output buffer with unfiltered data, there's no point + * to try decoding more data to temp. + */ + if (b->out_pos + s->temp.size < b->out_size) + return XZ_OK; } /* - * If we have unfiltered data in temp, try to fill by decoding more - * data from the next filter. Apply the BCJ filter on temp. Then we - * hopefully can fill the actual output buffer by copying filtered - * data from temp. A mix of filtered and unfiltered data may be left - * in temp; it will be taken care on the next call to this function. + * We have unfiltered data in temp. If the output buffer isn't full + * yet, try to fill the temp buffer by decoding more data from the + * next filter. Apply the BCJ filter on temp. Then we hopefully can + * fill the actual output buffer by copying filtered data from temp. + * A mix of filtered and unfiltered data may be left in temp; it will + * be taken care on the next call to this function. */ - if (s->temp.size > 0) { + if (b->out_pos < b->out_size) { /* Make b->out{,_pos,_size} temporarily point to s->temp. */ s->out = b->out; s->out_pos = b->out_pos; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d6edf8d14f9..a87da524a4a 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -359,6 +359,17 @@ static unsigned long bdi_longest_inactive(void) return max(5UL * 60 * HZ, interval); } +/* + * Clear pending bit and wakeup anybody waiting for flusher thread creation or + * shutdown + */ +static void bdi_clear_pending(struct backing_dev_info *bdi) +{ + clear_bit(BDI_pending, &bdi->state); + smp_mb__after_clear_bit(); + wake_up_bit(&bdi->state, BDI_pending); +} + static int bdi_forker_thread(void *ptr) { struct bdi_writeback *me = ptr; @@ -390,6 +401,13 @@ static int bdi_forker_thread(void *ptr) } spin_lock_bh(&bdi_lock); + /* + * In the following loop we are going to check whether we have + * some work to do without any synchronization with tasks + * waking us up to do work for them. So we have to set task + * state already here so that we don't miss wakeups coming + * after we verify some condition. + */ set_current_state(TASK_INTERRUPTIBLE); list_for_each_entry(bdi, &bdi_list, bdi_list) { @@ -469,11 +487,13 @@ static int bdi_forker_thread(void *ptr) spin_unlock_bh(&bdi->wb_lock); wake_up_process(task); } + bdi_clear_pending(bdi); break; case KILL_THREAD: __set_current_state(TASK_RUNNING); kthread_stop(task); + bdi_clear_pending(bdi); break; case NO_ACTION: @@ -489,16 +509,8 @@ static int bdi_forker_thread(void *ptr) else schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); try_to_freeze(); - /* Back to the main loop */ - continue; + break; } - - /* - * Clear pending bit and wakeup anybody waiting to tear us down. - */ - clear_bit(BDI_pending, &bdi->state); - smp_mb__after_clear_bit(); - wake_up_bit(&bdi->state, BDI_pending); } return 0; diff --git a/mm/filemap.c b/mm/filemap.c index 645a080ba4d..7771871fa35 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -827,13 +827,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, { unsigned int i; unsigned int ret; - unsigned int nr_found; + unsigned int nr_found, nr_skip; rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, (void ***)pages, NULL, start, nr_pages); ret = 0; + nr_skip = 0; for (i = 0; i < nr_found; i++) { struct page *page; repeat: @@ -856,6 +857,7 @@ repeat: * here as an exceptional entry: so skip over it - * we only reach this from invalidate_mapping_pages(). */ + nr_skip++; continue; } @@ -876,7 +878,7 @@ repeat: * If all entries were removed before we could secure them, * try again, because callers stop trying once 0 is returned. */ - if (unlikely(!ret && nr_found)) + if (unlikely(!ret && nr_found > nr_skip)) goto restart; rcu_read_unlock(); return ret; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ebd1e86bef1..3508777837c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list { static void mem_cgroup_threshold(struct mem_cgroup *mem); static void mem_cgroup_oom_notify(struct mem_cgroup *mem); -enum { - SCAN_BY_LIMIT, - SCAN_BY_SYSTEM, - NR_SCAN_CONTEXT, - SCAN_BY_SHRINK, /* not recorded now */ -}; - -enum { - SCAN, - SCAN_ANON, - SCAN_FILE, - ROTATE, - ROTATE_ANON, - ROTATE_FILE, - FREED, - FREED_ANON, - FREED_FILE, - ELAPSED, - NR_SCANSTATS, -}; - -struct scanstat { - spinlock_t lock; - unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; - unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; -}; - -const char *scanstat_string[NR_SCANSTATS] = { - "scanned_pages", - "scanned_anon_pages", - "scanned_file_pages", - "rotated_pages", - "rotated_anon_pages", - "rotated_file_pages", - "freed_pages", - "freed_anon_pages", - "freed_file_pages", - "elapsed_ns", -}; -#define SCANSTAT_WORD_LIMIT "_by_limit" -#define SCANSTAT_WORD_SYSTEM "_by_system" -#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" - - /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@ -313,8 +269,7 @@ struct mem_cgroup { /* For oom notifier event fd */ struct list_head oom_notify; - /* For recording LRU-scan statistics */ - struct scanstat scanstat; + /* * Should we move charges of a task when a task is moved into this * mem_cgroup ? And what type of charges should we move ? @@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) } #endif -static void __mem_cgroup_record_scanstat(unsigned long *stats, - struct memcg_scanrecord *rec) -{ - - stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; - stats[SCAN_ANON] += rec->nr_scanned[0]; - stats[SCAN_FILE] += rec->nr_scanned[1]; - - stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; - stats[ROTATE_ANON] += rec->nr_rotated[0]; - stats[ROTATE_FILE] += rec->nr_rotated[1]; - - stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; - stats[FREED_ANON] += rec->nr_freed[0]; - stats[FREED_FILE] += rec->nr_freed[1]; - - stats[ELAPSED] += rec->elapsed; -} - -static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) -{ - struct mem_cgroup *mem; - int context = rec->context; - - if (context >= NR_SCAN_CONTEXT) - return; - - mem = rec->mem; - spin_lock(&mem->scanstat.lock); - __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); - spin_unlock(&mem->scanstat.lock); - - mem = rec->root; - spin_lock(&mem->scanstat.lock); - __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); - spin_unlock(&mem->scanstat.lock); -} - /* * Scan the hierarchy if needed to reclaim memory. We remember the last child * we reclaimed from, so that we don't end up penalizing one child extensively @@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; - struct memcg_scanrecord rec; unsigned long excess; - unsigned long scanned; + unsigned long nr_scanned; excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; @@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, if (!check_soft && !shrink && root_mem->memsw_is_minimum) noswap = true; - if (shrink) - rec.context = SCAN_BY_SHRINK; - else if (check_soft) - rec.context = SCAN_BY_SYSTEM; - else - rec.context = SCAN_BY_LIMIT; - - rec.root = root_mem; - while (1) { victim = mem_cgroup_select_victim(root_mem); if (victim == root_mem) { @@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, css_put(&victim->css); continue; } - rec.mem = victim; - rec.nr_scanned[0] = 0; - rec.nr_scanned[1] = 0; - rec.nr_rotated[0] = 0; - rec.nr_rotated[1] = 0; - rec.nr_freed[0] = 0; - rec.nr_freed[1] = 0; - rec.elapsed = 0; /* we use swappiness of local cgroup */ if (check_soft) { ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, - noswap, zone, &rec, &scanned); - *total_scanned += scanned; + noswap, zone, &nr_scanned); + *total_scanned += nr_scanned; } else ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, - noswap, &rec); - mem_cgroup_record_scanstat(&rec); + noswap); css_put(&victim->css); /* * At shrinking usage, we can't check we should stop here or @@ -3854,18 +3752,14 @@ try_to_free: /* try to free all pages in this cgroup */ shrink = 1; while (nr_retries && mem->res.usage > 0) { - struct memcg_scanrecord rec; int progress; if (signal_pending(current)) { ret = -EINTR; goto out; } - rec.context = SCAN_BY_SHRINK; - rec.mem = mem; - rec.root = mem; progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, - false, &rec); + false); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) } #endif /* CONFIG_NUMA */ -static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, - struct cftype *cft, - struct cgroup_map_cb *cb) -{ - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); - char string[64]; - int i; - - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_LIMIT); - cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); - } - - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_SYSTEM); - cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); - } - - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_LIMIT); - strcat(string, SCANSTAT_WORD_HIERARCHY); - cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); - } - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_SYSTEM); - strcat(string, SCANSTAT_WORD_HIERARCHY); - cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); - } - return 0; -} - -static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, - unsigned int event) -{ - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); - - spin_lock(&mem->scanstat.lock); - memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); - memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); - spin_unlock(&mem->scanstat.lock); - return 0; -} - - static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = { .mode = S_IRUGO, }, #endif - { - .name = "vmscan_stat", - .read_map = mem_cgroup_vmscan_stat_read, - .trigger = mem_cgroup_reset_vmscan_stat, - }, }; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP @@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) atomic_set(&mem->refcnt, 1); mem->move_charge_at_immigrate = 0; mutex_init(&mem->thresholds_lock); - spin_lock_init(&mem->scanstat.lock); return &mem->css; free_out: __mem_cgroup_free(mem); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8b57173c1dd..9c51f9f58ca 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -636,7 +636,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; - pgoff_t pgoff; unsigned long vmstart; unsigned long vmend; @@ -649,9 +648,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); - pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, - vma->anon_vma, vma->vm_file, pgoff, new_pol); + vma->anon_vma, vma->vm_file, vma->vm_pgoff, + new_pol); if (prev) { vma = prev; next = vma->vm_next; @@ -1412,7 +1411,9 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy, err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags); if (!err && nmask) { - err = copy_from_user(bm, nm, alloc_size); + unsigned long copy_size; + copy_size = min_t(unsigned long, sizeof(bm), alloc_size); + err = copy_from_user(bm, nm, copy_size); /* ensure entire bitmap is zeroed */ err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8); err |= compat_put_bitmap(nmask, bm, nr_bits); diff --git a/mm/migrate.c b/mm/migrate.c index 666e4e67741..14d0a6a632f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -120,10 +120,10 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, ptep = pte_offset_map(pmd, addr); - if (!is_swap_pte(*ptep)) { - pte_unmap(ptep); - goto out; - } + /* + * Peek to check is_swap_pte() before taking ptlock? No, we + * can race mremap's move_ptes(), which skips anon_vma lock. + */ ptl = pte_lockptr(mm, pmd); } diff --git a/mm/slub.c b/mm/slub.c index 9f662d70eb4..7c54fe83a90 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2377,7 +2377,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, */ if (unlikely(!prior)) { remove_full(s, page); - add_partial(n, page, 0); + add_partial(n, page, 1); stat(s, FREE_ADD_PARTIAL); } } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7ef0903058e..5016f19e166 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2140,6 +2140,14 @@ struct vm_struct *alloc_vm_area(size_t size) return NULL; } + /* + * If the allocated address space is passed to a hypercall + * before being used then we cannot rely on a page fault to + * trigger an update of the page tables. So sync all the page + * tables here. + */ + vmalloc_sync_all(); + return area; } EXPORT_SYMBOL_GPL(alloc_vm_area); diff --git a/mm/vmscan.c b/mm/vmscan.c index b7719ec10dc..b55699cd906 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -105,7 +105,6 @@ struct scan_control { /* Which cgroup do we reclaim from */ struct mem_cgroup *mem_cgroup; - struct memcg_scanrecord *memcg_record; /* * Nodemask of nodes allowed by the caller. If NULL, all nodes @@ -1349,8 +1348,6 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); reclaim_stat->recent_rotated[file] += numpages; - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_rotated[file] += numpages; } if (!pagevec_add(&pvec, page)) { spin_unlock_irq(&zone->lru_lock); @@ -1394,10 +1391,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, reclaim_stat->recent_scanned[0] += *nr_anon; reclaim_stat->recent_scanned[1] += *nr_file; - if (!scanning_global_lru(sc)) { - sc->memcg_record->nr_scanned[0] += *nr_anon; - sc->memcg_record->nr_scanned[1] += *nr_file; - } } /* @@ -1511,9 +1504,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, nr_reclaimed += shrink_page_list(&page_list, zone, sc); } - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_freed[file] += nr_reclaimed; - local_irq_disable(); if (current_is_kswapd()) __count_vm_events(KSWAPD_STEAL, nr_reclaimed); @@ -1613,8 +1603,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, } reclaim_stat->recent_scanned[file] += nr_taken; - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, pgscanned); if (file) @@ -1666,8 +1654,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * get_scan_ratio. */ reclaim_stat->recent_rotated[file] += nr_rotated; - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_rotated[file] += nr_rotated; move_active_pages_to_lru(zone, &l_active, LRU_ACTIVE + file * LRU_FILE); @@ -1808,23 +1794,15 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, u64 fraction[2], denominator; enum lru_list l; int noswap = 0; - int force_scan = 0; + bool force_scan = false; unsigned long nr_force_scan[2]; - - anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); - - if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) { - /* kswapd does zone balancing and need to scan this zone */ - if (scanning_global_lru(sc) && current_is_kswapd()) - force_scan = 1; - /* memcg may have small limit and need to avoid priority drop */ - if (!scanning_global_lru(sc)) - force_scan = 1; - } + /* kswapd does zone balancing and needs to scan this zone */ + if (scanning_global_lru(sc) && current_is_kswapd()) + force_scan = true; + /* memcg may have small limit and need to avoid priority drop */ + if (!scanning_global_lru(sc)) + force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ if (!sc->may_swap || (nr_swap_pages <= 0)) { @@ -1837,6 +1815,11 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, goto out; } + anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); + file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + if (scanning_global_lru(sc)) { free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, @@ -2268,10 +2251,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap, - struct zone *zone, - struct memcg_scanrecord *rec, - unsigned long *scanned) + gfp_t gfp_mask, bool noswap, + struct zone *zone, + unsigned long *nr_scanned) { struct scan_control sc = { .nr_scanned = 0, @@ -2281,9 +2263,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, .may_swap = !noswap, .order = 0, .mem_cgroup = mem, - .memcg_record = rec, }; - ktime_t start, end; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -2292,7 +2272,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, sc.may_writepage, sc.gfp_mask); - start = ktime_get(); /* * NOTE: Although we can get the priority field, using it * here is not a good idea, since it limits the pages we can scan. @@ -2301,25 +2280,19 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, * the priority and make it zero. */ shrink_zone(0, zone, &sc); - end = ktime_get(); - - if (rec) - rec->elapsed += ktime_to_ns(ktime_sub(end, start)); - *scanned = sc.nr_scanned; trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); + *nr_scanned = sc.nr_scanned; return sc.nr_reclaimed; } unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask, - bool noswap, - struct memcg_scanrecord *rec) + bool noswap) { struct zonelist *zonelist; unsigned long nr_reclaimed; - ktime_t start, end; int nid; struct scan_control sc = { .may_writepage = !laptop_mode, @@ -2328,7 +2301,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, .mem_cgroup = mem_cont, - .memcg_record = rec, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), @@ -2337,7 +2309,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .gfp_mask = sc.gfp_mask, }; - start = ktime_get(); /* * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't * take care of from where we get pages. So the node where we start the @@ -2352,9 +2323,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, sc.gfp_mask); nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); - end = ktime_get(); - if (rec) - rec->elapsed += ktime_to_ns(ktime_sub(end, start)); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); diff --git a/mm/vmstat.c b/mm/vmstat.c index 20c18b7694b..d52b13d28e8 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -659,7 +659,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, } #endif -#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) +#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -788,7 +788,7 @@ const char * const vmstat_text[] = { #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; -#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */ +#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ #ifdef CONFIG_PROC_FS diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3e2f91ffa4e..05dd35114a2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -565,7 +565,7 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) struct orig_node *orig_node = NULL; int data_len = skb->len, ret; short vid = -1; - bool do_bcast = false; + bool do_bcast; if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto dropped; @@ -598,15 +598,15 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) tt_local_add(soft_iface, ethhdr->h_source); orig_node = transtable_search(bat_priv, ethhdr->h_dest); - if (is_multicast_ether_addr(ethhdr->h_dest) || - (orig_node && orig_node->gw_flags)) { + do_bcast = is_multicast_ether_addr(ethhdr->h_dest); + if (do_bcast || (orig_node && orig_node->gw_flags)) { ret = gw_is_target(bat_priv, skb, orig_node); if (ret < 0) goto dropped; - if (ret == 0) - do_bcast = true; + if (ret) + do_bcast = false; } /* ethernet packet should be broadcasted */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a40170e022e..7ef4eb4435f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -58,8 +58,8 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - if (test_bit(HCI_MGMT, &hdev->flags) && - test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 0); hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); @@ -76,8 +76,8 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - if (test_bit(HCI_MGMT, &hdev->flags) && - test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 0); hci_conn_check_pending(hdev); @@ -959,9 +959,8 @@ static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - if (test_bit(HCI_MGMT, &hdev->flags) && - !test_and_set_bit(HCI_INQUIRY, - &hdev->flags)) + if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 1); } @@ -1340,8 +1339,8 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, status); - if (test_bit(HCI_MGMT, &hdev->flags) && - test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 0); hci_req_complete(hdev, HCI_OP_INQUIRY, status); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 61f1f623091..e8292369cdc 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -26,6 +26,8 @@ /* Bluetooth L2CAP sockets. */ +#include <linux/security.h> + #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> @@ -933,6 +935,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->force_reliable = pchan->force_reliable; chan->flushable = pchan->flushable; chan->force_active = pchan->force_active; + + security_sk_clone(parent, sk); } else { switch (sk->sk_type) { diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 482722bbc7a..5417f612732 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -42,6 +42,7 @@ #include <linux/device.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/security.h> #include <net/sock.h> #include <asm/system.h> @@ -264,6 +265,8 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent) pi->sec_level = rfcomm_pi(parent)->sec_level; pi->role_switch = rfcomm_pi(parent)->role_switch; + + security_sk_clone(parent, sk); } else { pi->dlc->defer_setup = 0; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8270f05e3f1..a324b009e34 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -41,6 +41,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/list.h> +#include <linux/security.h> #include <net/sock.h> #include <asm/system.h> @@ -403,8 +404,10 @@ static void sco_sock_init(struct sock *sk, struct sock *parent) { BT_DBG("sk %p", sk); - if (parent) + if (parent) { sk->sk_type = parent->sk_type; + security_sk_clone(parent, sk); + } } static struct proto sco_proto = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 32b8f9f7f79..ff3ed6086ce 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - netif_carrier_off(dev); netdev_update_features(dev); netif_start_queue(dev); br_stp_enable_bridge(br); @@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - netif_carrier_off(dev); - br_stp_disable_bridge(br); br_multicast_stop(br); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index e73815456ad..1d420f64ff2 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -161,9 +161,10 @@ static void del_nbp(struct net_bridge_port *p) call_rcu(&p->rcu, destroy_nbp_rcu); } -/* called with RTNL */ -static void del_br(struct net_bridge *br, struct list_head *head) +/* Delete bridge device */ +void br_dev_delete(struct net_device *dev, struct list_head *head) { + struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; list_for_each_entry_safe(p, n, &br->port_list, list) { @@ -268,7 +269,7 @@ int br_del_bridge(struct net *net, const char *name) } else - del_br(netdev_priv(dev), NULL); + br_dev_delete(dev, NULL); rtnl_unlock(); return ret; @@ -449,7 +450,7 @@ void __net_exit br_net_exit(struct net *net) rtnl_lock(); for_each_netdev(net, dev) if (dev->priv_flags & IFF_EBRIDGE) - del_br(netdev_priv(dev), &list); + br_dev_delete(dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5b1ed1ba9aa..e5f9ece3c9a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -210,6 +210,7 @@ static struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .dellink = br_dev_delete, }; int __init br_netlink_init(void) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 78cc364997d..857a021deea 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -294,6 +294,7 @@ static inline int br_is_root_bridge(const struct net_bridge *br) /* br_device.c */ extern void br_dev_setup(struct net_device *dev); +extern void br_dev_delete(struct net_device *dev, struct list_head *list); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ba6f73eb06c..a9aff9c7d02 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,7 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7c2fa0a0814..7f9ac0742d1 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) caifdevs = caif_device_list(dev_net(dev)); BUG_ON(!caifdevs); - caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); + caifd = kzalloc(sizeof(*caifd), GFP_KERNEL); if (!caifd) return NULL; caifd->pcpu_refcnt = alloc_percpu(int); + if (!caifd->pcpu_refcnt) { + kfree(caifd); + return NULL; + } caifd->netdev = dev; dev_hold(dev); return caifd; diff --git a/net/can/af_can.c b/net/can/af_can.c index 8ce926d3b2c..9b0c32a2690 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -857,7 +857,7 @@ static __exit void can_exit(void) struct net_device *dev; if (stats_timer) - del_timer(&can_stattimer); + del_timer_sync(&can_stattimer); can_remove_proc(); diff --git a/net/can/bcm.c b/net/can/bcm.c index d6c8ae5b2e6..c84963d2dee 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } +static void bcm_tx_start_timer(struct bcm_op *op) +{ + if (op->kt_ival1.tv64 && op->count) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); + else if (op->kt_ival2.tv64) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); +} + static void bcm_tx_timeout_tsklet(unsigned long data) { struct bcm_op *op = (struct bcm_op *)data; @@ -365,26 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - } - - if (op->kt_ival1.tv64 && (op->count > 0)) { - - /* send (next) frame */ bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); - } else { - if (op->kt_ival2.tv64) { + } else if (op->kt_ival2.tv64) + bcm_can_tx(op); - /* send (next) frame */ - bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); - } - } + bcm_tx_start_timer(op); } /* @@ -964,23 +962,20 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_cancel(&op->timer); } - if ((op->flags & STARTTIMER) && - ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) { - + if (op->flags & STARTTIMER) { + hrtimer_cancel(&op->timer); /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - - if (op->kt_ival1.tv64 && (op->count > 0)) { - /* op->count-- is done in bcm_tx_timeout_handler */ - hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); - } else - hrtimer_start(&op->timer, op->kt_ival2, - HRTIMER_MODE_REL); } - if (op->flags & TX_ANNOUNCE) + if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); + if (op->count) + op->count--; + } + + if (op->flags & STARTTIMER) + bcm_tx_start_timer(op); return msg_head->nframes * CFSIZ + MHSIZ; } diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 132963abc26..2883ea01e68 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -232,6 +232,7 @@ void ceph_destroy_options(struct ceph_options *opt) ceph_crypto_key_destroy(opt->key); kfree(opt->key); } + kfree(opt->mon_addr); kfree(opt); } EXPORT_SYMBOL(ceph_destroy_options); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index c340e2e0765..9918e9eb276 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2307,6 +2307,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; + m->ack_stamp = 0; m->pool = NULL; /* middle */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 16836a7df7a..88ad8a2501b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); + INIT_LIST_HEAD(&req->r_req_lru_item); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -816,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc, { req->r_tid = ++osdc->last_tid; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); - INIT_LIST_HEAD(&req->r_req_lru_item); - dout("__register_request %p tid %lld\n", req, req->r_tid); __insert_request(osdc, req); ceph_osdc_get_request(req); osdc->num_requests++; - if (osdc->num_requests == 1) { dout(" first request, scheduling timeout\n"); __schedule_osd_timeout(osdc); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e97c3588c3e..fd863fe7693 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, struct ceph_pg_mapping *pg = NULL; int c; + dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new); while (*p) { parent = *p; pg = rb_entry(parent, struct ceph_pg_mapping, node); @@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, while (n) { pg = rb_entry(n, struct ceph_pg_mapping, node); c = pgid_cmp(pgid, pg->pgid); - if (c < 0) + if (c < 0) { n = n->rb_left; - else if (c > 0) + } else if (c > 0) { n = n->rb_right; - else + } else { + dout("__lookup_pg_mapping %llx got %p\n", + *(u64 *)&pgid, pg); return pg; + } } return NULL; } +static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) +{ + struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); + + if (pg) { + dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + rb_erase(&pg->node, root); + kfree(pg); + return 0; + } + dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + return -ENOENT; +} + /* * rbtree of pg pool info */ @@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, void *start = *p; int err = -EINVAL; u16 version; - struct rb_node *rbp; ceph_decode_16_safe(p, end, version, bad); if (version > CEPH_OSDMAP_INC_VERSION) { @@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_pg_temp */ - rbp = rb_first(&map->pg_temp); ceph_decode_32_safe(p, end, len, bad); while (len--) { struct ceph_pg_mapping *pg; @@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ceph_decode_copy(p, &pgid, sizeof(pgid)); pglen = ceph_decode_32(p); - /* remove any? */ - while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, - node)->pgid, pgid) <= 0) { - struct ceph_pg_mapping *cur = - rb_entry(rbp, struct ceph_pg_mapping, node); - - rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); - rb_erase(&cur->node, &map->pg_temp); - kfree(cur); - } - if (pglen) { /* insert */ ceph_decode_need(p, end, pglen*sizeof(u32), bad); @@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, pglen); + } else { + /* remove */ + __remove_pg_mapping(&map->pg_temp, pgid); } } - while (rbp) { - struct ceph_pg_mapping *cur = - rb_entry(rbp, struct ceph_pg_mapping, node); - - rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); - rb_erase(&cur->node, &map->pg_temp); - kfree(cur); - } /* ignore the rest */ *p = end; @@ -1046,10 +1044,25 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned poolid, ps, pps; + unsigned poolid, ps, pps, t; int preferred; + poolid = le32_to_cpu(pgid.pool); + ps = le16_to_cpu(pgid.ps); + preferred = (s16)le16_to_cpu(pgid.preferred); + + pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + if (!pool) + return NULL; + /* pg_temp? */ + if (preferred >= 0) + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num), + pool->lpgp_num_mask); + else + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), + pool->pgp_num_mask); + pgid.ps = cpu_to_le16(t); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1057,18 +1070,6 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); - preferred = (s16)le16_to_cpu(pgid.preferred); - - /* don't forcefeed bad device ids to crush */ - if (preferred >= osdmap->max_osd || - preferred >= osdmap->crush->max_devices) - preferred = -1; - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); - if (!pool) - return NULL; ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, pool->v.type, pool->v.size); if (ruleno < 0) { @@ -1078,6 +1079,11 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, return NULL; } + /* don't forcefeed bad device ids to crush */ + if (preferred >= osdmap->max_osd || + preferred >= osdmap->crush->max_devices) + preferred = -1; + if (preferred >= 0) pps = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpgp_num), diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579be..b10ff0a7185 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b..27071ee2a4e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && - r->target == rule->pref) { - BUG_ON(rtnl_dereference(r->ctarget) != NULL); + r->target == rule->pref && + rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; @@ -475,8 +475,11 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_del_rcu(&rule->list); - if (rule->action == FR_ACT_GOTO) + if (rule->action == FR_ACT_GOTO) { ops->nr_goto_rules--; + if (rtnl_dereference(rule->ctarget) == NULL) + ops->unresolved_rules--; + } /* * Check if this rule is a target to any of them. If so, diff --git a/net/core/flow.c b/net/core/flow.c index bf32c33cad3..555a456efb0 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -222,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -230,11 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -246,9 +251,10 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 27002dffe7e..387703f56fc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -/* skb frags copy userspace buffers to kernel */ -static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +/* skb_copy_ubufs - copy userspace skb frags buffers to kernel + * @skb: the skb to modify + * @gfp_mask: allocation priority + * + * This must be called on SKBTX_DEV_ZEROCOPY skb. + * It will copy all frags into kernel and drop the reference + * to userspace pages. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + * + * Returns 0 on success or a negative error code on failure + * to allocate kernel memory to copy to. + */ +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int i; int num_frags = skb_shinfo(skb)->nr_frags; @@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->frags[i - 1].page = head; head = (struct page *)head->private; } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } @@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) return NULL; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } n = skb + 1; @@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n = NULL; goto out; } - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; @@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) goto nofrags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 27997d35ebd..a2468363978 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - dev->priv_flags = IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b745d412cf..dd2b9478ddd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; if (addr->sin_family != AF_INET) { + /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) + * only if s_addr is INADDR_ANY. + */ err = -EAFNOSUPPORT; - goto out; + if (addr->sin_family != AF_UNSPEC || + addr->sin_addr.s_addr != htonl(INADDR_ANY)) + goto out; } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35b74b..80106d89d54 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }; /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); + kfree(fi); +} void free_fib_info(struct fib_info *fi) { @@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi) } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); - kfree_rcu(fi, rcu); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d96391..e59aabd0eae 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; @@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b14ec7d03b6..4bfad5da94f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), + SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), + SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea0d2183df4..d73aab3fbfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1124,7 +1124,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, return 0; /* ...Then it's D-SACK, and must reside below snd_una completely */ - if (!after(end_seq, tp->snd_una)) + if (after(end_seq, tp->snd_una)) return 0; if (!before(start_seq, tp->undo_marker)) @@ -1389,9 +1389,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Tweak before seqno plays */ - if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && - !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c12b8ec849..7963e03f106 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static void syn_flood_warning(const struct sk_buff *skb) +/* + * Return 1 if a syncookie should be sent + */ +int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) { - const char *msg; + const char *msg = "Dropping request"; + int want_cookie = 0; + struct listen_sock *lopt; + + #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) + if (sysctl_tcp_syncookies) { msg = "Sending cookies"; - else + want_cookie = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else #endif - msg = "Dropping request"; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - pr_info("TCP: Possible SYN flooding on port %d. %s.\n", - ntohs(tcp_hdr(skb)->dest), msg); + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. " + " Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; } +EXPORT_SYMBOL(tcp_syn_flood_action); /* * Save and compile IPv4 options into the request_sock if needed. @@ -909,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } sk_nocaps_add(sk, NETIF_F_GSO_MASK); } - if (tcp_alloc_md5sig_pool(sk) == NULL) { + + md5sig = tp->md5sig_info; + if (md5sig->entries4 == 0 && + tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } - md5sig = tp->md5sig_info; if (md5sig->alloced4 == md5sig->entries4) { keys = kmalloc((sizeof(*keys) * (md5sig->entries4 + 1)), GFP_ATOMIC); if (!keys) { kfree(newkey); - tcp_free_md5sig_pool(); + if (md5sig->entries4 == 0) + tcp_free_md5sig_pool(); return -ENOMEM; } @@ -964,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) kfree(tp->md5sig_info->keys4); tp->md5sig_info->keys4 = NULL; tp->md5sig_info->alloced4 = 0; + tcp_free_md5sig_pool(); } else if (tp->md5sig_info->entries4 != i) { /* Need to do some manipulation */ memmove(&tp->md5sig_info->keys4[i], @@ -971,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) (tp->md5sig_info->entries4 - i) * sizeof(struct tcp4_md5sig_key)); } - tcp_free_md5sig_pool(); return 0; } } @@ -1235,11 +1256,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ -#endif /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1250,14 +1267,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - want_cookie = 1; - } else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); + if (!want_cookie) + goto drop; } /* Accept backlog is full. If we have already queued enough @@ -1303,9 +1315,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d2fe4e06b47..0ce3d06dce6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -328,6 +328,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + tw->tw_transparent = inet_sk(sk)->transparent; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f012ebd87b4..12368c58606 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -374,8 +374,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) "%s(): cannot allocate memory for statistics; dev=%s.\n", __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); + dev_put(dev); + kfree(ndev); return NULL; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3b5669a2582..d27c797f9f0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -875,6 +875,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_reset_transport_header(skb); __skb_push(skb, skb_gro_offset(skb)); + ops = rcu_dereference(inet6_protos[proto]); if (!ops || !ops->gro_receive) goto out_unlock; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9ef1831746e..b46e9f88ce3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -599,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct net *net, +int datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) @@ -658,7 +658,8 @@ int datagram_send_ctl(struct net *net, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(net, &src_info->ipi6_addr, + if (!inet_sk(sk)->transparent && + !ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f3caf1b8d57..54303945019 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo } static struct ip6_flowlabel * -fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, - int optlen, int *err_p) +fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, + char __user *optval, int optlen, int *err_p) { struct ip6_flowlabel *fl = NULL; int olen; @@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, + err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; @@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; - fl = fl_create(net, &freq, optval, optlen, &err); + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (fl == NULL) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 705c8288628..def0538e241 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -2052,8 +2054,10 @@ int ip6_mr_input(struct sk_buff *skb) int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 147ede38ab4..2fbda5fc4cc 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, + retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 24939486328..e63c3972a73 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; @@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6a79f3081bd..343852e5c70 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9e69eb0ec6d..fb545edef6e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; + if (!(rt->dst.flags & DST_HOST)) + return NULL; + if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); @@ -241,7 +244,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, { struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); - memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + if (rt != NULL) + memset(&rt->rt6i_table, 0, + sizeof(*rt) - sizeof(struct dst_entry)); return rt; } @@ -252,6 +257,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct inet_peer *peer = rt->rt6i_peer; + if (!(rt->dst.flags & DST_HOST)) + dst_destroy_metrics_generic(dst); + if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -723,9 +731,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, ipv6_addr_copy(&rt->rt6i_gateway, daddr); } - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -775,9 +781,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct rt6_info *rt = ip6_rt_copy(ort, daddr); if (rt) { - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; @@ -1078,12 +1082,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, neigh = NULL; } - rt->rt6i_idev = idev; + rt->dst.flags |= DST_HOST; + rt->dst.output = ip6_output; dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - rt->dst.output = ip6_output; + + ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.plen = 128; + rt->rt6i_idev = idev; spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; @@ -1261,6 +1268,14 @@ int ip6_route_add(struct fib6_config *cfg) if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; + if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { + u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!metrics) { + err = -ENOMEM; + goto out; + } + dst_init_metrics(&rt->dst, metrics, 0); + } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1607,9 +1622,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_dst.plen = 128; - nrt->dst.flags |= DST_HOST; - ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); @@ -1754,9 +1766,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt) { rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; + rt->dst.flags |= DST_HOST; ipv6_addr_copy(&rt->rt6i_dst.addr, dest); - rt->rt6i_dst.plen = ort->rt6i_dst.plen; + rt->rt6i_dst.plen = 128; dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fb63f4aeb..7b8fc579435 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, return tcp_v6_send_synack(sk, req, rvp); } -static inline void syn_flood_warning(struct sk_buff *skb) -{ -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest)); - else -#endif - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Dropping request.\n", ntohs(tcp_hdr(skb)->dest)); -} - static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree_skb(inet6_rsk(req)->pktopts); @@ -605,7 +591,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, } sk_nocaps_add(sk, NETIF_F_GSO_MASK); } - if (tcp_alloc_md5sig_pool(sk) == NULL) { + if (tp->md5sig_info->entries6 == 0 && + tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } @@ -614,8 +601,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); if (!keys) { - tcp_free_md5sig_pool(); kfree(newkey); + if (tp->md5sig_info->entries6 == 0) + tcp_free_md5sig_pool(); return -ENOMEM; } @@ -661,6 +649,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) kfree(tp->md5sig_info->keys6); tp->md5sig_info->keys6 = NULL; tp->md5sig_info->alloced6 = 0; + tcp_free_md5sig_pool(); } else { /* shrink the database */ if (tp->md5sig_info->entries6 != i) @@ -669,7 +658,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) (tp->md5sig_info->entries6 - i) * sizeof (tp->md5sig_info->keys6[0])); } - tcp_free_md5sig_pool(); return 0; } } @@ -1179,11 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 -#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1192,14 +1176,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - want_cookie = 1; - else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); + if (!want_cookie) + goto drop; } if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) @@ -1249,9 +1228,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { @@ -1408,6 +1385,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1472,6 +1451,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29213b51c49..bb95e8e1c6f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1090,8 +1090,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index d0b70dadf73..2615ffc8e78 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -40,9 +40,9 @@ extern int sysctl_slot_timeout; extern int sysctl_fast_poll_increase; extern char sysctl_devname[]; extern int sysctl_max_baud_rate; -extern int sysctl_min_tx_turn_time; -extern int sysctl_max_tx_data_size; -extern int sysctl_max_tx_window; +extern unsigned int sysctl_min_tx_turn_time; +extern unsigned int sysctl_max_tx_data_size; +extern unsigned int sysctl_max_tx_window; extern int sysctl_max_noreply_time; extern int sysctl_warn_noreply_time; extern int sysctl_lap_keepalive_time; diff --git a/net/irda/qos.c b/net/irda/qos.c index 1b51bcf4239..4369f7f41bc 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12; * Default is 10us which means using the unmodified value given by the * peer except if it's 0 (0 is likely a bug in the other stack). */ -unsigned sysctl_min_tx_turn_time = 10; +unsigned int sysctl_min_tx_turn_time = 10; /* * Maximum data size to be used in transmission in payload of LAP frame. * There is a bit of confusion in the IrDA spec : @@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10; * bytes frames or all negotiated frame sizes, but you can use the sysctl * to play with this value anyway. * Jean II */ -unsigned sysctl_max_tx_data_size = 2042; +unsigned int sysctl_max_tx_data_size = 2042; /* * Maximum transmit window, i.e. number of LAP frames between turn-around. * This allow to override what the peer told us. Some peers are buggy and * don't always support what they tell us. * Jean II */ -unsigned sysctl_max_tx_window = 7; +unsigned int sysctl_max_tx_window = 7; static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get); static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ad4ac2601a5..34b2ddeacb6 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1045,8 +1045,10 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + hdr_len; old_headroom = skb_headroom(skb); - if (skb_cow_head(skb, headroom)) + if (skb_cow_head(skb, headroom)) { + dev_kfree_skb(skb); goto abort; + } new_headroom = skb_headroom(skb); skb_orphan(skb); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3db78b696c5..21070e9bc8d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -665,7 +665,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) BUG_ON(!sdata->bss); atomic_dec(&sdata->bss->num_sta_ps); - __sta_info_clear_tim_bit(sdata->bss, sta); + sta_info_clear_tim_bit(sta); } local->num_sta--; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2b771dc708a..e3be48bf4dc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2283,6 +2283,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) struct ip_vs_service *svc; struct ip_vs_dest_user *udest_compat; struct ip_vs_dest_user_kern udest; + struct netns_ipvs *ipvs = net_ipvs(net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2303,6 +2304,24 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* increase the module use count */ ip_vs_use_count_inc(); + /* Handle daemons since they have another lock */ + if (cmd == IP_VS_SO_SET_STARTDAEMON || + cmd == IP_VS_SO_SET_STOPDAEMON) { + struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; + + if (mutex_lock_interruptible(&ipvs->sync_mutex)) { + ret = -ERESTARTSYS; + goto out_dec; + } + if (cmd == IP_VS_SO_SET_STARTDAEMON) + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, + dm->syncid); + else + ret = stop_sync_thread(net, dm->state); + mutex_unlock(&ipvs->sync_mutex); + goto out_dec; + } + if (mutex_lock_interruptible(&__ip_vs_mutex)) { ret = -ERESTARTSYS; goto out_dec; @@ -2316,15 +2335,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Set timeout values for (tcp tcpfin udp) */ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; - } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { - struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = start_sync_thread(net, dm->state, dm->mcast_ifn, - dm->syncid); - goto out_unlock; - } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { - struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = stop_sync_thread(net, dm->state); - goto out_unlock; } usvc_compat = (struct ip_vs_service_user *)arg; @@ -2584,6 +2594,33 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(arg, user, copylen) != 0) return -EFAULT; + /* + * Handle daemons first since it has its own locking + */ + if (cmd == IP_VS_SO_GET_DAEMON) { + struct ip_vs_daemon_user d[2]; + + memset(&d, 0, sizeof(d)); + if (mutex_lock_interruptible(&ipvs->sync_mutex)) + return -ERESTARTSYS; + + if (ipvs->sync_state & IP_VS_STATE_MASTER) { + d[0].state = IP_VS_STATE_MASTER; + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + sizeof(d[0].mcast_ifn)); + d[0].syncid = ipvs->master_syncid; + } + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { + d[1].state = IP_VS_STATE_BACKUP; + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + sizeof(d[1].mcast_ifn)); + d[1].syncid = ipvs->backup_syncid; + } + if (copy_to_user(user, &d, sizeof(d)) != 0) + ret = -EFAULT; + mutex_unlock(&ipvs->sync_mutex); + return ret; + } if (mutex_lock_interruptible(&__ip_vs_mutex)) return -ERESTARTSYS; @@ -2681,28 +2718,6 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } break; - case IP_VS_SO_GET_DAEMON: - { - struct ip_vs_daemon_user d[2]; - - memset(&d, 0, sizeof(d)); - if (ipvs->sync_state & IP_VS_STATE_MASTER) { - d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, - sizeof(d[0].mcast_ifn)); - d[0].syncid = ipvs->master_syncid; - } - if (ipvs->sync_state & IP_VS_STATE_BACKUP) { - d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, - sizeof(d[1].mcast_ifn)); - d[1].syncid = ipvs->backup_syncid; - } - if (copy_to_user(user, &d, sizeof(d)) != 0) - ret = -EFAULT; - } - break; - default: ret = -EINVAL; } @@ -3205,7 +3220,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->sync_mutex); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, ipvs->master_mcast_ifn, @@ -3225,7 +3240,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->sync_mutex); return skb->len; } @@ -3271,13 +3286,9 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) return ip_vs_set_timeout(net, &t); } -static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { - struct ip_vs_service *svc = NULL; - struct ip_vs_service_user_kern usvc; - struct ip_vs_dest_user_kern udest; int ret = 0, cmd; - int need_full_svc = 0, need_full_dest = 0; struct net *net; struct netns_ipvs *ipvs; @@ -3285,19 +3296,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; - mutex_lock(&__ip_vs_mutex); - - if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); - goto out; - } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(net, info->attrs); - goto out; - } else if (cmd == IPVS_CMD_NEW_DAEMON || - cmd == IPVS_CMD_DEL_DAEMON) { - + if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + mutex_lock(&ipvs->sync_mutex); if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], @@ -3310,6 +3312,33 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_genl_new_daemon(net, daemon_attrs); else ret = ip_vs_genl_del_daemon(net, daemon_attrs); +out: + mutex_unlock(&ipvs->sync_mutex); + } + return ret; +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + struct net *net; + struct netns_ipvs *ipvs; + + net = skb_sknet(skb); + ipvs = net_ipvs(net); + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(net); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { @@ -3536,13 +3565,13 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = { .cmd = IPVS_CMD_NEW_DAEMON, .flags = GENL_ADMIN_PERM, .policy = ip_vs_cmd_policy, - .doit = ip_vs_genl_set_cmd, + .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, .flags = GENL_ADMIN_PERM, .policy = ip_vs_cmd_policy, - .doit = ip_vs_genl_set_cmd, + .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_GET_DAEMON, @@ -3679,7 +3708,7 @@ int __net_init ip_vs_control_net_init(struct net *net) int idx; struct netns_ipvs *ipvs = net_ipvs(net); - ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + rwlock_init(&ipvs->rs_lock); /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7ee7215b8ba..3cdd479f9b5 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -61,6 +61,7 @@ #define SYNC_PROTO_VER 1 /* Protocol version in header */ +static struct lock_class_key __ipvs_sync_key; /* * IPVS sync connection entry * Version 0, i.e. original version. @@ -1545,6 +1546,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + if (state == IP_VS_STATE_MASTER) { if (ipvs->master_thread) return -EEXIST; @@ -1667,6 +1669,7 @@ int __net_init ip_vs_sync_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); INIT_LIST_HEAD(&ipvs->sync_queue); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); @@ -1680,7 +1683,9 @@ int __net_init ip_vs_sync_net_init(struct net *net) void ip_vs_sync_net_cleanup(struct net *net) { int retc; + struct netns_ipvs *ipvs = net_ipvs(net); + mutex_lock(&ipvs->sync_mutex); retc = stop_sync_thread(net, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); @@ -1688,4 +1693,5 @@ void ip_vs_sync_net_cleanup(struct net *net) retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); + mutex_unlock(&ipvs->sync_mutex); } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 2fd4565144d..31d56b23b9e 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb, break; case PPTP_WAN_ERROR_NOTIFY: + case PPTP_SET_LINK_INFO: case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index cf616e55ca4..d69facdd9a7 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -241,8 +241,8 @@ static int gre_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ - set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_ASSURED, ct); + if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 37bf94394be..8235b86b4e8 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK_PERM && opsize == TCPOLEN_SACK_PERM) @@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, BUG_ON(ptr == NULL); /* Fast path for timestamp-only option */ - if (length == TCPOLEN_TSTAMP_ALIGNED*4 + if (length == TCPOLEN_TSTAMP_ALIGNED && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) @@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK && opsize >= (TCPOLEN_SACK_BASE diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 00bd475eab4..a80b0cb03f1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[]) return NULL; vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); - verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; + if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) return NULL; return vhdr; } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 76a083184d8..ed0db15ab00 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; - int ret = false; + int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) @@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) if (!est1) goto err1; + est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(info->name2); if (!est2) goto err2; - } else - est2 = NULL; - + } info->est1 = est1; info->est2 = est2; @@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) err2: xt_rateest_put(est1); err1: - return -EINVAL; + return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c698cec0a44..fabb4fafa28 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -961,7 +961,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: - po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); + spin_lock(&sk->sk_receive_queue.lock); + po->stats.tp_drops++; + atomic_inc(&sk->sk_drops); + spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 8b77edbab27..4e1de171866 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list); + struct list_head *kill_list, + int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) @@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) * will be destroyed by the unmap function. */ if (!list_empty(&unmap_list)) { - ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); + ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, + &kill_list, &unpinned); /* If we've been asked to destroy all MRs, move those * that were simply cleaned to the kill list */ if (free_all) @@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) spin_unlock_irqrestore(&pool->list_lock, flags); } + atomic_sub(unpinned, &pool->free_pinned); atomic_sub(ncleaned, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); @@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list) + struct list_head *kill_list, + int *unpinned) { struct rds_iw_mapping *mapping, *next; unsigned int ncleaned = 0; @@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, spin_lock_irqsave(&pool->list_lock, flags); list_for_each_entry_safe(mapping, next, unmap_list, m_list) { + *unpinned += mapping->m_sg.len; list_move(&mapping->m_list, &laundered); ncleaned++; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index be4505ee67a..b01427924f8 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, struct rsvp_filter *f, **fp; struct rsvp_session *s, **sp; struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS-1]; + struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_RSVP_MAX + 1]; struct tcf_exts e; unsigned int h1, h2; @@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); if (err < 0) return err; @@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (f->handle != handle && handle) goto errout2; - if (tb[TCA_RSVP_CLASSID-1]) { - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); tcf_bind_filter(tp, &f->res, base); } @@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (handle) goto errout2; - if (tb[TCA_RSVP_DST-1] == NULL) + if (tb[TCA_RSVP_DST] == NULL) goto errout2; err = -ENOBUFS; @@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, goto errout2; h2 = 16; - if (tb[TCA_RSVP_SRC-1]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src)); + if (tb[TCA_RSVP_SRC]) { + memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); h2 = hash_src(f->src); } - if (tb[TCA_RSVP_PINFO-1]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO-1]); + if (tb[TCA_RSVP_PINFO]) { + pinfo = nla_data(tb[TCA_RSVP_PINFO]); f->spi = pinfo->spi; f->tunnelhdr = pinfo->tunnelhdr; } - if (tb[TCA_RSVP_CLASSID-1]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - dst = nla_data(tb[TCA_RSVP_DST-1]); + dst = nla_data(tb[TCA_RSVP_DST]); h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); err = -ENOMEM; @@ -642,8 +642,7 @@ nla_put_failure: return -1; } -static struct tcf_proto_ops RSVP_OPS = { - .next = NULL, +static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, .init = rsvp_init, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 167c880cf8d..76388b083f2 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; + + case SCTP_CMD_SET_ASOC: + asoc = cmd->obj.asoc; + break; + default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 49b847b00f9..a0f31e6c1c6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); + /* Restore association pointer to provide SCTP command interpeter + * with a valid context in case it needs to manipulate + * the queues */ + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, + SCTP_ASOC((struct sctp_association *)asoc)); + return retval; nomem: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e83e7fee3bc..ea40d540a99 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4113,9 +4113,12 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, if (len % sizeof(u32)) return -EINVAL; + if (settings->n_akm_suites > NL80211_MAX_NR_AKM_SUITES) + return -EINVAL; + memcpy(settings->akm_suites, data, len); - for (i = 0; i < settings->n_ciphers_pairwise; i++) + for (i = 0; i < settings->n_akm_suites; i++) if (!nl80211_valid_akm_suite(settings->akm_suites[i])) return -EINVAL; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 02751dbc5a9..68a471ba193 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -852,6 +852,7 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, (int) MBI_TO_DBI(power_rule->max_antenna_gain)); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index b7b6ff8be55..dec0fa28372 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -118,6 +118,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) i++, j++) request->channels[i] = &wdev->wiphy->bands[band]->channels[j]; + request->rates[band] = + (1 << wdev->wiphy->bands[band]->n_bitrates) - 1; } } request->n_channels = n_channels; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d30615419b4..5f03e4ea65b 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -91,7 +91,7 @@ int x25_parse_address_block(struct sk_buff *skb, int needed; int rc; - if (skb->len < 1) { + if (!pskb_may_pull(skb, 1)) { /* packet has no address block */ rc = 0; goto empty; @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb, len = *skb->data; needed = 1 + (len >> 4) + (len & 0x0f); - if (skb->len < needed) { + if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims to hold */ rc = -1; @@ -295,7 +295,8 @@ static struct sock *x25_find_listener(struct x25_address *addr, * Found a listening socket, now check the incoming * call user data vs this sockets call user data */ - if(skb->len > 0 && x25_sk(s)->cudmatchlength > 0) { + if (x25_sk(s)->cudmatchlength > 0 && + skb->len >= x25_sk(s)->cudmatchlength) { if((memcmp(x25_sk(s)->calluserdata.cuddata, skb->data, x25_sk(s)->cudmatchlength)) == 0) { @@ -951,14 +952,27 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, * * Facilities length is mandatory in call request packets */ - if (skb->len < 1) + if (!pskb_may_pull(skb, 1)) goto out_clear_request; len = skb->data[0] + 1; - if (skb->len < len) + if (!pskb_may_pull(skb, len)) goto out_clear_request; skb_pull(skb,len); /* + * Ensure that the amount of call user data is valid. + */ + if (skb->len > X25_MAX_CUD_LEN) + goto out_clear_request; + + /* + * Get all the call user data so it can be used in + * x25_find_listener and skb_copy_from_linear_data up ahead. + */ + if (!pskb_may_pull(skb, skb->len)) + goto out_clear_request; + + /* * Find a listener for the particular address/cud pair. */ sk = x25_find_listener(&source_addr,skb); @@ -1166,6 +1180,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, * byte of the user data is the logical value of the Q Bit. */ if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { + if (!pskb_may_pull(skb, 1)) + goto out_kfree_skb; + qbit = skb->data[0]; skb_pull(skb, 1); } @@ -1244,7 +1261,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, struct x25_sock *x25 = x25_sk(sk); struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name; size_t copied; - int qbit; + int qbit, header_len = x25->neighbour->extended ? + X25_EXT_MIN_LEN : X25_STD_MIN_LEN; + struct sk_buff *skb; unsigned char *asmptr; int rc = -ENOTCONN; @@ -1265,6 +1284,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_dequeue(&x25->interrupt_in_queue); + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + goto out_free_dgram; + skb_pull(skb, X25_STD_MIN_LEN); /* @@ -1285,10 +1307,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto out; + if (!pskb_may_pull(skb, header_len)) + goto out_free_dgram; + qbit = (skb->data[0] & X25_Q_BIT) == X25_Q_BIT; - skb_pull(skb, x25->neighbour->extended ? - X25_EXT_MIN_LEN : X25_STD_MIN_LEN); + skb_pull(skb, header_len); if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { asmptr = skb_push(skb, 1); diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index e547ca1578c..fa2b41888bd 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -32,6 +32,9 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) unsigned short frametype; unsigned int lci; + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + return 0; + frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); @@ -115,6 +118,9 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, goto drop; } + if (!pskb_may_pull(skb, 1)) + return 0; + switch (skb->data[0]) { case X25_IFACE_DATA: diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index f77e4e75f91..36384a1fa9f 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -44,7 +44,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) { - unsigned char *p = skb->data; + unsigned char *p; unsigned int len; *vc_fac_mask = 0; @@ -60,14 +60,16 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, memset(dte_facs->called_ae, '\0', sizeof(dte_facs->called_ae)); memset(dte_facs->calling_ae, '\0', sizeof(dte_facs->calling_ae)); - if (skb->len < 1) + if (!pskb_may_pull(skb, 1)) return 0; - len = *p++; + len = skb->data[0]; - if (len >= skb->len) + if (!pskb_may_pull(skb, 1 + len)) return -1; + p = skb->data + 1; + while (len > 0) { switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 0b073b51b18..a49cd4ec551 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -107,6 +107,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp /* * Parse the data in the frame. */ + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + goto out_clear; skb_pull(skb, X25_STD_MIN_LEN); len = x25_parse_address_block(skb, &source_addr, @@ -127,9 +129,11 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp * Copy any Call User Data. */ if (skb->len > 0) { - skb_copy_from_linear_data(skb, - x25->calluserdata.cuddata, - skb->len); + if (skb->len > X25_MAX_CUD_LEN) + goto out_clear; + + skb_copy_bits(skb, 0, x25->calluserdata.cuddata, + skb->len); x25->calluserdata.cudlength = skb->len; } if (!sock_flag(sk, SOCK_DEAD)) @@ -137,6 +141,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; } case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); break; @@ -164,6 +171,9 @@ static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametyp switch (frametype) { case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, 0, skb->data[3], skb->data[4]); break; @@ -177,6 +187,11 @@ static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25_start_t23timer(sk); + return 0; } /* @@ -206,6 +221,9 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, 0, skb->data[3], skb->data[4]); break; @@ -304,6 +322,12 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return queued; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* @@ -313,13 +337,13 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp */ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype) { + struct x25_sock *x25 = x25_sk(sk); + switch (frametype) { case X25_RESET_REQUEST: x25_write_internal(sk, X25_RESET_CONFIRMATION); case X25_RESET_CONFIRMATION: { - struct x25_sock *x25 = x25_sk(sk); - x25_stop_timer(sk); x25->condition = 0x00; x25->va = 0; @@ -331,6 +355,9 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; } case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, 0, skb->data[3], skb->data[4]); break; @@ -340,6 +367,12 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* Higher level upcall for a LAPB frame */ diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 037958ff8ee..4acacf3c661 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -90,6 +90,9 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, break; case X25_DIAGNOSTIC: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4)) + break; + printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 24a342ebc7f..5170d52bfd9 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -269,7 +269,11 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, int *d, int *m) { struct x25_sock *x25 = x25_sk(sk); - unsigned char *frame = skb->data; + unsigned char *frame; + + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + return X25_ILLEGAL; + frame = skb->data; *ns = *nr = *q = *d = *m = 0; @@ -294,6 +298,10 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, if (frame[2] == X25_RR || frame[2] == X25_RNR || frame[2] == X25_REJ) { + if (!pskb_may_pull(skb, X25_EXT_MIN_LEN)) + return X25_ILLEGAL; + frame = skb->data; + *nr = (frame[3] >> 1) & 0x7F; return frame[2]; } @@ -308,6 +316,10 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, if (x25->neighbour->extended) { if ((frame[2] & 0x01) == X25_DATA) { + if (!pskb_may_pull(skb, X25_EXT_MIN_LEN)) + return X25_ILLEGAL; + frame = skb->data; + *q = (frame[0] & X25_Q_BIT) == X25_Q_BIT; *d = (frame[0] & X25_D_BIT) == X25_D_BIT; *m = (frame[3] & X25_EXT_M_BIT) == X25_EXT_M_BIT; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a026b0ef244..54a0dc2e2f8 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,6 +212,11 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; + if (async && x->repl->check(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); + goto drop_unlock; + } + x->repl->advance(x, seq); x->curlft.bytes += skb->len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 94fdcc7f103..552df27dcf5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1349,14 +1349,16 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) BUG(); } xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); - memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry)); - xfrm_policy_put_afinfo(afinfo); - if (likely(xdst)) + if (likely(xdst)) { + memset(&xdst->u.rt6.rt6i_table, 0, + sizeof(*xdst) - sizeof(struct dst_entry)); xdst->flo.ops = &xfrm_bundle_fc_ops; - else + } else xdst = ERR_PTR(-ENOBUFS); + xfrm_policy_put_afinfo(afinfo); + return xdst; } diff --git a/security/security.c b/security/security.c index 0e4fccfef12..d9e15339092 100644 --- a/security/security.c +++ b/security/security.c @@ -1097,6 +1097,7 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk) { security_ops->sk_clone_security(sk, newsk); } +EXPORT_SYMBOL(security_sk_clone); void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 86d0caf91b3..62e90b862a0 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1761,6 +1761,10 @@ static int wait_for_avail(struct snd_pcm_substream *substream, snd_pcm_uframes_t avail = 0; long wait_time, tout; + init_waitqueue_entry(&wait, current); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&runtime->tsleep, &wait); + if (runtime->no_period_wakeup) wait_time = MAX_SCHEDULE_TIMEOUT; else { @@ -1771,16 +1775,32 @@ static int wait_for_avail(struct snd_pcm_substream *substream, } wait_time = msecs_to_jiffies(wait_time * 1000); } - init_waitqueue_entry(&wait, current); - add_wait_queue(&runtime->tsleep, &wait); + for (;;) { if (signal_pending(current)) { err = -ERESTARTSYS; break; } + + /* + * We need to check if space became available already + * (and thus the wakeup happened already) first to close + * the race of space already having become available. + * This check must happen after been added to the waitqueue + * and having current state be INTERRUPTIBLE. + */ + if (is_playback) + avail = snd_pcm_playback_avail(runtime); + else + avail = snd_pcm_capture_avail(runtime); + if (avail >= runtime->twake) + break; snd_pcm_stream_unlock_irq(substream); - tout = schedule_timeout_interruptible(wait_time); + + tout = schedule_timeout(wait_time); + snd_pcm_stream_lock_irq(substream); + set_current_state(TASK_INTERRUPTIBLE); switch (runtime->status->state) { case SNDRV_PCM_STATE_SUSPENDED: err = -ESTRPIPE; @@ -1806,14 +1826,9 @@ static int wait_for_avail(struct snd_pcm_substream *substream, err = -EIO; break; } - if (is_playback) - avail = snd_pcm_playback_avail(runtime); - else - avail = snd_pcm_capture_avail(runtime); - if (avail >= runtime->twake) - break; } _endloop: + set_current_state(TASK_RUNNING); remove_wait_queue(&runtime->tsleep, &wait); *availp = avail; return err; diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index f9123f09e83..32b02d90670 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -68,6 +68,7 @@ MODULE_PARM_DESC(enable, "Enable FM801 soundcard."); module_param_array(tea575x_tuner, int, NULL, 0444); MODULE_PARM_DESC(tea575x_tuner, "TEA575x tuner access method (0 = auto, 1 = SF256-PCS, 2=SF256-PCP, 3=SF64-PCR, 8=disable, +16=tuner-only)."); +#define TUNER_DISABLED (1<<3) #define TUNER_ONLY (1<<4) #define TUNER_TYPE_MASK (~TUNER_ONLY & 0xFFFF) @@ -1150,7 +1151,8 @@ static int snd_fm801_free(struct fm801 *chip) __end_hw: #ifdef CONFIG_SND_FM801_TEA575X_BOOL - snd_tea575x_exit(&chip->tea); + if (!(chip->tea575x_tuner & TUNER_DISABLED)) + snd_tea575x_exit(&chip->tea); #endif if (chip->irq >= 0) free_irq(chip->irq, chip); @@ -1236,7 +1238,6 @@ static int __devinit snd_fm801_create(struct snd_card *card, (tea575x_tuner & TUNER_TYPE_MASK) < 4) { if (snd_tea575x_init(&chip->tea)) { snd_printk(KERN_ERR "TEA575x radio not found\n"); - snd_fm801_free(chip); return -ENODEV; } } else if ((tea575x_tuner & TUNER_TYPE_MASK) == 0) { @@ -1251,11 +1252,15 @@ static int __devinit snd_fm801_create(struct snd_card *card, } if (tea575x_tuner == 4) { snd_printk(KERN_ERR "TEA575x radio not found\n"); - snd_fm801_free(chip); - return -ENODEV; + chip->tea575x_tuner = TUNER_DISABLED; } } - strlcpy(chip->tea.card, snd_fm801_tea575x_gpios[(tea575x_tuner & TUNER_TYPE_MASK) - 1].name, sizeof(chip->tea.card)); + if (!(chip->tea575x_tuner & TUNER_DISABLED)) { + strlcpy(chip->tea.card, + snd_fm801_tea575x_gpios[(tea575x_tuner & + TUNER_TYPE_MASK) - 1].name, + sizeof(chip->tea.card)); + } #endif *rchip = chip; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3e7850c238c..f3aefef3721 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -579,9 +579,13 @@ int snd_hda_get_conn_index(struct hda_codec *codec, hda_nid_t mux, return -1; } recursive++; - for (i = 0; i < nums; i++) + for (i = 0; i < nums; i++) { + unsigned int type = get_wcaps_type(get_wcaps(codec, conn[i])); + if (type == AC_WID_PIN || type == AC_WID_AUD_OUT) + continue; if (snd_hda_get_conn_index(codec, conn[i], nid, recursive) >= 0) return i; + } return -1; } EXPORT_SYMBOL_HDA(snd_hda_get_conn_index); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index be6982289c0..191284a1c0a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1924,7 +1924,8 @@ static unsigned int azx_via_get_position(struct azx *chip, } static unsigned int azx_get_position(struct azx *chip, - struct azx_dev *azx_dev) + struct azx_dev *azx_dev, + bool with_check) { unsigned int pos; int stream = azx_dev->substream->stream; @@ -1940,7 +1941,7 @@ static unsigned int azx_get_position(struct azx *chip, default: /* use the position buffer */ pos = le32_to_cpu(*azx_dev->posbuf); - if (chip->position_fix[stream] == POS_FIX_AUTO) { + if (with_check && chip->position_fix[stream] == POS_FIX_AUTO) { if (!pos || pos == (u32)-1) { printk(KERN_WARNING "hda-intel: Invalid position buffer, " @@ -1964,7 +1965,7 @@ static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) struct azx *chip = apcm->chip; struct azx_dev *azx_dev = get_azx_dev(substream); return bytes_to_frames(substream->runtime, - azx_get_position(chip, azx_dev)); + azx_get_position(chip, azx_dev, false)); } /* @@ -1987,7 +1988,7 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) return -1; /* bogus (too early) interrupt */ stream = azx_dev->substream->stream; - pos = azx_get_position(chip, azx_dev); + pos = azx_get_position(chip, azx_dev, true); if (WARN_ONCE(!azx_dev->period_bytes, "hda-intel: zero azx_dev->period_bytes")) @@ -2369,6 +2370,7 @@ static int azx_dev_free(struct snd_device *device) static struct snd_pci_quirk position_fix_list[] __devinitdata = { SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB), SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1028, 0x02c6, "Dell Inspiron 1010", POS_FIX_LPIB), SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB), SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB), diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index d6c93d92b55..c45f3e69bcf 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -535,7 +535,7 @@ static int add_volume(struct hda_codec *codec, const char *name, int index, unsigned int pval, int dir, struct snd_kcontrol **kctlp) { - char tmp[32]; + char tmp[44]; struct snd_kcontrol_new knew = HDA_CODEC_VOLUME_IDX(tmp, index, 0, 0, HDA_OUTPUT); knew.private_value = pval; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 7696d05b935..76752d8ea73 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3110,6 +3110,7 @@ static const struct snd_pci_quirk cxt5066_cfg_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x21c5, "Thinkpad Edge 13", CXT5066_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x21c6, "Thinkpad Edge 13", CXT5066_ASUS), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo Thinkpad", CXT5066_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520 & W520", CXT5066_AUTO), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT5066_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT5066_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo U350", CXT5066_ASUS), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7cabd731716..7a73621a890 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -168,7 +168,7 @@ struct alc_spec { unsigned int auto_mic_valid_imux:1; /* valid imux for auto-mic */ unsigned int automute:1; /* HP automute enabled */ unsigned int detect_line:1; /* Line-out detection enabled */ - unsigned int automute_lines:1; /* automute line-out as well */ + unsigned int automute_lines:1; /* automute line-out as well; NOP when automute_hp_lo isn't set */ unsigned int automute_hp_lo:1; /* both HP and LO available */ /* other flags */ @@ -551,7 +551,7 @@ static void update_speakers(struct hda_codec *codec) if (spec->autocfg.line_out_pins[0] == spec->autocfg.hp_pins[0] || spec->autocfg.line_out_pins[0] == spec->autocfg.speaker_pins[0]) return; - if (!spec->automute_lines || !spec->automute) + if (!spec->automute || (spec->automute_hp_lo && !spec->automute_lines)) on = 0; else on = spec->jack_present; @@ -578,6 +578,10 @@ static void alc_line_automute(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; + /* check LO jack only when it's different from HP */ + if (spec->autocfg.line_out_pins[0] == spec->autocfg.hp_pins[0]) + return; + spec->line_jack_present = detect_jacks(codec, ARRAY_SIZE(spec->autocfg.line_out_pins), spec->autocfg.line_out_pins); @@ -803,7 +807,7 @@ static int alc_automute_mode_get(struct snd_kcontrol *kcontrol, unsigned int val; if (!spec->automute) val = 0; - else if (!spec->automute_lines) + else if (!spec->automute_hp_lo || !spec->automute_lines) val = 1; else val = 2; @@ -824,7 +828,8 @@ static int alc_automute_mode_put(struct snd_kcontrol *kcontrol, spec->automute = 0; break; case 1: - if (spec->automute && !spec->automute_lines) + if (spec->automute && + (!spec->automute_hp_lo || !spec->automute_lines)) return 0; spec->automute = 1; spec->automute_lines = 0; @@ -1320,7 +1325,9 @@ do_sku: * 15 : 1 --> enable the function "Mute internal speaker * when the external headphone out jack is plugged" */ - if (!spec->autocfg.hp_pins[0]) { + if (!spec->autocfg.hp_pins[0] && + !(spec->autocfg.line_out_pins[0] && + spec->autocfg.line_out_type == AUTO_PIN_HP_OUT)) { hda_nid_t nid; tmp = (ass >> 11) & 0x3; /* HP to chassis */ if (tmp == 0) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 5145b663ef6..987e3cf71a0 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -5630,6 +5630,7 @@ again: switch (codec->vendor_id) { case 0x111d76d1: case 0x111d76d9: + case 0x111d76df: case 0x111d76e5: case 0x111d7666: case 0x111d7667: @@ -6573,6 +6574,7 @@ static const struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x111d76cc, .name = "92HD89F3", .patch = patch_stac92hd73xx }, { .id = 0x111d76cd, .name = "92HD89F2", .patch = patch_stac92hd73xx }, { .id = 0x111d76ce, .name = "92HD89F1", .patch = patch_stac92hd73xx }, + { .id = 0x111d76df, .name = "92HD93BXX", .patch = patch_stac92hd83xxx}, { .id = 0x111d76e0, .name = "92HD91BXX", .patch = patch_stac92hd83xxx}, { .id = 0x111d76e3, .name = "92HD98BXX", .patch = patch_stac92hd83xxx}, { .id = 0x111d76e5, .name = "92HD99BXX", .patch = patch_stac92hd83xxx}, diff --git a/sound/soc/blackfin/bf5xx-ad193x.c b/sound/soc/blackfin/bf5xx-ad193x.c index a118a0fb9d8..5956584ea3a 100644 --- a/sound/soc/blackfin/bf5xx-ad193x.c +++ b/sound/soc/blackfin/bf5xx-ad193x.c @@ -103,7 +103,7 @@ static struct snd_soc_dai_link bf5xx_ad193x_dai[] = { .cpu_dai_name = "bfin-tdm.0", .codec_dai_name ="ad193x-hifi", .platform_name = "bfin-tdm-pcm-audio", - .codec_name = "ad193x.5", + .codec_name = "spi0.5", .ops = &bf5xx_ad193x_ops, }, { @@ -112,7 +112,7 @@ static struct snd_soc_dai_link bf5xx_ad193x_dai[] = { .cpu_dai_name = "bfin-tdm.1", .codec_dai_name ="ad193x-hifi", .platform_name = "bfin-tdm-pcm-audio", - .codec_name = "ad193x.5", + .codec_name = "spi0.5", .ops = &bf5xx_ad193x_ops, }, }; diff --git a/sound/soc/blackfin/bf5xx-ad73311.c b/sound/soc/blackfin/bf5xx-ad73311.c index 732a247f252..b94eb7ef7d1 100644 --- a/sound/soc/blackfin/bf5xx-ad73311.c +++ b/sound/soc/blackfin/bf5xx-ad73311.c @@ -128,7 +128,7 @@ static int snd_ad73311_configure(void) return 0; } -static int bf5xx_probe(struct platform_device *pdev) +static int bf5xx_probe(struct snd_soc_card *card) { int err; if (gpio_request(GPIO_SE, "AD73311_SE")) { diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 84f4ad56855..9801cd7cfcb 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -431,7 +431,8 @@ static int ssm2602_set_dai_fmt(struct snd_soc_dai *codec_dai, static int ssm2602_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { - u16 reg = snd_soc_read(codec, SSM2602_PWR) & 0xff7f; + u16 reg = snd_soc_read(codec, SSM2602_PWR); + reg &= ~(PWR_POWER_OFF | PWR_OSC_PDN); switch (level) { case SND_SOC_BIAS_ON: diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index ffa2ffe5ec1..aa091a0d818 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1454,8 +1454,8 @@ static int wm8753_probe(struct snd_soc_codec *codec) /* set the update bits */ snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100); snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100); + snd_soc_update_bits(codec, WM8753_LADC, 0x0100, 0x0100); + snd_soc_update_bits(codec, WM8753_RADC, 0x0100, 0x0100); snd_soc_update_bits(codec, WM8753_LOUT1V, 0x0100, 0x0100); snd_soc_update_bits(codec, WM8753_ROUT1V, 0x0100, 0x0100); snd_soc_update_bits(codec, WM8753_LOUT2V, 0x0100, 0x0100); diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 1725550c293..d2c315fa1b9 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3479,31 +3479,6 @@ int wm8962_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack) } EXPORT_SYMBOL_GPL(wm8962_mic_detect); -#ifdef CONFIG_PM -static int wm8962_resume(struct snd_soc_codec *codec) -{ - u16 *reg_cache = codec->reg_cache; - int i; - - /* Restore the registers */ - for (i = 1; i < codec->driver->reg_cache_size; i++) { - switch (i) { - case WM8962_SOFTWARE_RESET: - continue; - default: - break; - } - - if (reg_cache[i] != wm8962_reg[i]) - snd_soc_write(codec, i, reg_cache[i]); - } - - return 0; -} -#else -#define wm8962_resume NULL -#endif - #if defined(CONFIG_INPUT) || defined(CONFIG_INPUT_MODULE) static int beep_rates[] = { 500, 1000, 2000, 4000, @@ -4015,7 +3990,6 @@ static int wm8962_remove(struct snd_soc_codec *codec) static struct snd_soc_codec_driver soc_codec_dev_wm8962 = { .probe = wm8962_probe, .remove = wm8962_remove, - .resume = wm8962_resume, .set_bias_level = wm8962_set_bias_level, .reg_cache_size = WM8962_MAX_REGISTER + 1, .reg_word_size = sizeof(u16), diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c index fd0dc46afc3..5c6c2457386 100644 --- a/sound/soc/fsl/mpc5200_dma.c +++ b/sound/soc/fsl/mpc5200_dma.c @@ -369,7 +369,7 @@ static struct snd_soc_platform_driver mpc5200_audio_dma_platform = { .pcm_free = &psc_dma_free, }; -static int mpc5200_hpcd_probe(struct of_device *op) +static int mpc5200_hpcd_probe(struct platform_device *op) { phys_addr_t fifo; struct psc_dma *psc_dma; @@ -487,7 +487,7 @@ out_unmap: return ret; } -static int mpc5200_hpcd_remove(struct of_device *op) +static int mpc5200_hpcd_remove(struct platform_device *op) { struct psc_dma *psc_dma = dev_get_drvdata(&op->dev); @@ -519,7 +519,7 @@ MODULE_DEVICE_TABLE(of, mpc5200_hpcd_match); static struct platform_driver mpc5200_hpcd_of_driver = { .probe = mpc5200_hpcd_probe, .remove = mpc5200_hpcd_remove, - .dev = { + .driver = { .owner = THIS_MODULE, .name = "mpc5200-pcm-audio", .of_match_table = mpc5200_hpcd_match, diff --git a/sound/soc/imx/imx-pcm-fiq.c b/sound/soc/imx/imx-pcm-fiq.c index 309c59e6fb6..7945625e0e0 100644 --- a/sound/soc/imx/imx-pcm-fiq.c +++ b/sound/soc/imx/imx-pcm-fiq.c @@ -240,7 +240,6 @@ static int ssi_irq = 0; static int imx_pcm_fiq_new(struct snd_soc_pcm_runtime *rtd) { - struct snd_card *card = rtd->card->snd_card; struct snd_soc_dai *dai = rtd->cpu_dai; struct snd_pcm *pcm = rtd->pcm; int ret; diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 8f16cd37c2a..d0bcf3fcea0 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev) if (!priv->mem) { dev_err(&pdev->dev, "request_mem_region failed\n"); err = -EBUSY; - goto error_alloc; + goto err_alloc; } priv->io = ioremap(priv->mem->start, SZ_16K); diff --git a/sound/soc/omap/mcpdm.c b/sound/soc/omap/mcpdm.c index 928f0370745..50e59194ad8 100644 --- a/sound/soc/omap/mcpdm.c +++ b/sound/soc/omap/mcpdm.c @@ -449,7 +449,7 @@ exit: return ret; } -int __devexit omap_mcpdm_remove(struct platform_device *pdev) +int omap_mcpdm_remove(struct platform_device *pdev) { struct omap_mcpdm *mcpdm_ptr = platform_get_drvdata(pdev); diff --git a/sound/soc/omap/mcpdm.h b/sound/soc/omap/mcpdm.h index df3e16fb51f..20c20a8649f 100644 --- a/sound/soc/omap/mcpdm.h +++ b/sound/soc/omap/mcpdm.h @@ -150,4 +150,4 @@ extern int omap_mcpdm_request(void); extern void omap_mcpdm_free(void); extern int omap_mcpdm_set_offset(int offset1, int offset2); int __devinit omap_mcpdm_probe(struct platform_device *pdev); -int __devexit omap_mcpdm_remove(struct platform_device *pdev); +int omap_mcpdm_remove(struct platform_device *pdev); diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index ebcc2d4d2b1..478d6077845 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -516,6 +516,12 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai, struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs; int err = 0; + if (mcbsp_data->active) + if (freq == mcbsp_data->in_freq) + return 0; + else + return -EBUSY; + /* The McBSP signal muxing functions are only available on McBSP1 */ if (clk_id == OMAP_MCBSP_CLKR_SRC_CLKR || clk_id == OMAP_MCBSP_CLKR_SRC_CLKX || diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c index b6445757fc5..2b8350b5223 100644 --- a/sound/soc/pxa/zylonite.c +++ b/sound/soc/pxa/zylonite.c @@ -196,20 +196,20 @@ static int zylonite_probe(struct snd_soc_card *card) if (clk_pout) { pout = clk_get(NULL, "CLK_POUT"); if (IS_ERR(pout)) { - dev_err(&pdev->dev, "Unable to obtain CLK_POUT: %ld\n", + dev_err(card->dev, "Unable to obtain CLK_POUT: %ld\n", PTR_ERR(pout)); return PTR_ERR(pout); } ret = clk_enable(pout); if (ret != 0) { - dev_err(&pdev->dev, "Unable to enable CLK_POUT: %d\n", + dev_err(card->dev, "Unable to enable CLK_POUT: %d\n", ret); clk_put(pout); return ret; } - dev_dbg(&pdev->dev, "MCLK enabled at %luHz\n", + dev_dbg(card->dev, "MCLK enabled at %luHz\n", clk_get_rate(pout)); } @@ -241,7 +241,7 @@ static int zylonite_resume_pre(struct snd_soc_card *card) if (clk_pout) { ret = clk_enable(pout); if (ret != 0) - dev_err(&pdev->dev, "Unable to enable CLK_POUT: %d\n", + dev_err(card->dev, "Unable to enable CLK_POUT: %d\n", ret); } diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c index d9f8aded51f..20b7f3b003a 100644 --- a/sound/soc/soc-cache.c +++ b/sound/soc/soc-cache.c @@ -203,14 +203,14 @@ static int snd_soc_rbtree_cache_sync(struct snd_soc_codec *codec) rbnode = rb_entry(node, struct snd_soc_rbtree_node, node); for (i = 0; i < rbnode->blklen; ++i) { regtmp = rbnode->base_reg + i; - WARN_ON(codec->writable_register && - codec->writable_register(codec, regtmp)); val = snd_soc_rbtree_get_register(rbnode, i); def = snd_soc_get_cache_val(codec->reg_def_copy, i, rbnode->word_size); if (val == def) continue; + WARN_ON(!snd_soc_codec_writable_register(codec, regtmp)); + codec->cache_bypass = 1; ret = snd_soc_write(codec, regtmp, val); codec->cache_bypass = 0; @@ -563,8 +563,7 @@ static int snd_soc_lzo_cache_sync(struct snd_soc_codec *codec) lzo_blocks = codec->reg_cache; for_each_set_bit(i, lzo_blocks[0]->sync_bmp, lzo_blocks[0]->sync_bmp_nbits) { - WARN_ON(codec->writable_register && - codec->writable_register(codec, i)); + WARN_ON(!snd_soc_codec_writable_register(codec, i)); ret = snd_soc_cache_read(codec, i, &val); if (ret) return ret; @@ -823,8 +822,6 @@ static int snd_soc_flat_cache_sync(struct snd_soc_codec *codec) codec_drv = codec->driver; for (i = 0; i < codec_drv->reg_cache_size; ++i) { - WARN_ON(codec->writable_register && - codec->writable_register(codec, i)); ret = snd_soc_cache_read(codec, i, &val); if (ret) return ret; @@ -832,6 +829,9 @@ static int snd_soc_flat_cache_sync(struct snd_soc_codec *codec) if (snd_soc_get_cache_val(codec->reg_def_copy, i, codec_drv->reg_word_size) == val) continue; + + WARN_ON(!snd_soc_codec_writable_register(codec, i)); + ret = snd_soc_write(codec, i, val); if (ret) return ret; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index b085d8e8757..ef69f5a0270 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -30,6 +30,7 @@ #include <linux/bitops.h> #include <linux/debugfs.h> #include <linux/platform_device.h> +#include <linux/ctype.h> #include <linux/slab.h> #include <sound/ac97_codec.h> #include <sound/core.h> @@ -1434,9 +1435,20 @@ static void snd_soc_instantiate_card(struct snd_soc_card *card) "%s", card->name); snprintf(card->snd_card->longname, sizeof(card->snd_card->longname), "%s", card->long_name ? card->long_name : card->name); - if (card->driver_name) - strlcpy(card->snd_card->driver, card->driver_name, - sizeof(card->snd_card->driver)); + snprintf(card->snd_card->driver, sizeof(card->snd_card->driver), + "%s", card->driver_name ? card->driver_name : card->name); + for (i = 0; i < ARRAY_SIZE(card->snd_card->driver); i++) { + switch (card->snd_card->driver[i]) { + case '_': + case '-': + case '\0': + break; + default: + if (!isalnum(card->snd_card->driver[i])) + card->snd_card->driver[i] = '_'; + break; + } + } if (card->late_probe) { ret = card->late_probe(card); @@ -1633,7 +1645,7 @@ int snd_soc_codec_readable_register(struct snd_soc_codec *codec, if (codec->readable_register) return codec->readable_register(codec, reg); else - return 0; + return 1; } EXPORT_SYMBOL_GPL(snd_soc_codec_readable_register); @@ -1651,7 +1663,7 @@ int snd_soc_codec_writable_register(struct snd_soc_codec *codec, if (codec->writable_register) return codec->writable_register(codec, reg); else - return 0; + return 1; } EXPORT_SYMBOL_GPL(snd_soc_codec_writable_register); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7e15914b363..d67c637557a 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2763,7 +2763,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_ignore_suspend); /** * snd_soc_dapm_free - free dapm resources - * @card: SoC device + * @dapm: DAPM context * * Free all dapm widgets and resources. */ diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index 38b00131b2f..fa31d9c2abd 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -105,7 +105,7 @@ void snd_soc_jack_report(struct snd_soc_jack *jack, int status, int mask) snd_soc_dapm_sync(dapm); - snd_jack_report(jack->jack, status); + snd_jack_report(jack->jack, jack->status); out: mutex_unlock(&codec->mutex); diff --git a/sound/usb/card.c b/sound/usb/card.c index 781d9e61adf..d8f2bf40145 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -530,8 +530,11 @@ snd_usb_audio_probe(struct usb_device *dev, return chip; __error: - if (chip && !chip->num_interfaces) - snd_card_free(chip->card); + if (chip) { + if (!chip->num_interfaces) + snd_card_free(chip->card); + chip->probing = 0; + } mutex_unlock(®ister_mutex); __err_val: return NULL; diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3b8f7b80376..e9d5c271db6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -30,6 +30,8 @@ endif # Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. # # Define NO_DWARF if you do not want debug-info analysis feature at all. +# +# Define WERROR=0 to disable treating any warnings as errors. $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -63,6 +65,11 @@ ifeq ($(ARCH),x86_64) endif endif +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS_WERROR := -Werror +endif + # # Include saner warnings here, which can catch bugs: # @@ -95,7 +102,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6b0519f885e..f4c3fbee4ba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -161,6 +161,7 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ + attr->disabled = 1; attr->inherit = !no_inherit; attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | @@ -671,6 +672,8 @@ static int __cmd_record(int argc, const char **argv) } } + perf_evlist__enable(evsel_list); + /* * Let the child rip */ diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 55f4c76f282..efe696f936e 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -561,7 +561,7 @@ static int test__basic_mmap(void) } err = perf_event__parse_sample(event, attr.sample_type, sample_size, - false, &sample); + false, &sample, false); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_munmap; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a43433f0830..d28013b7d61 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -191,7 +191,8 @@ static void __zero_source_counters(struct sym_entry *syme) symbol__annotate_zero_histograms(sym); } -static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) +static void record_precise_ip(struct sym_entry *syme, struct map *map, + int counter, u64 ip) { struct annotation *notes; struct symbol *sym; @@ -205,8 +206,8 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) if (pthread_mutex_trylock(¬es->lock)) return; - ip = syme->map->map_ip(syme->map, ip); - symbol__inc_addr_samples(sym, syme->map, counter, ip); + ip = map->map_ip(map, ip); + symbol__inc_addr_samples(sym, map, counter, ip); pthread_mutex_unlock(¬es->lock); } @@ -810,7 +811,7 @@ static void perf_event__process_sample(const union perf_event *event, evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); syme->count[evsel->idx]++; - record_precise_ip(syme, evsel->idx, ip); + record_precise_ip(syme, al.map, evsel->idx, ip); pthread_mutex_lock(&top.active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) { static bool first = true; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3c1b8a63210..437f8ca679a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -169,12 +169,17 @@ static int perf_event__synthesize_mmap_events(union perf_event *event, continue; pbf += n + 3; if (*pbf == 'x') { /* vm_exec */ + char anonstr[] = "//anon\n"; char *execname = strchr(bf, '/'); /* Catch VDSO */ if (execname == NULL) execname = strstr(bf, "[vdso]"); + /* Catch anonymous mmaps */ + if ((execname == NULL) && !strstr(bf, "[")) + execname = anonstr; + if (execname == NULL) continue; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1d7f66488a8..357a85b8524 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -186,6 +186,6 @@ const char *perf_event__name(unsigned int id); int perf_event__parse_sample(const union perf_event *event, u64 type, int sample_size, bool sample_id_all, - struct perf_sample *sample); + struct perf_sample *sample, bool swapped); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c12bd476c6f..72e9f4886b6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -113,6 +113,19 @@ void perf_evlist__disable(struct perf_evlist *evlist) } } +void perf_evlist__enable(struct perf_evlist *evlist) +{ + int cpu, thread; + struct perf_evsel *pos; + + for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + list_for_each_entry(pos, &evlist->entries, node) { + for (thread = 0; thread < evlist->threads->nr; thread++) + ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); + } + } +} + int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index ce85ae9ae57..f3491500274 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); void perf_evlist__disable(struct perf_evlist *evlist); +void perf_evlist__enable(struct perf_evlist *evlist); static inline void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a03a36b7908..e389815078d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -7,6 +7,8 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include <byteswap.h> +#include "asm/bug.h" #include "evsel.h" #include "evlist.h" #include "util.h" @@ -342,10 +344,20 @@ static bool sample_overlap(const union perf_event *event, int perf_event__parse_sample(const union perf_event *event, u64 type, int sample_size, bool sample_id_all, - struct perf_sample *data) + struct perf_sample *data, bool swapped) { const u64 *array; + /* + * used for cross-endian analysis. See git commit 65014ab3 + * for why this goofiness is needed. + */ + union { + u64 val64; + u32 val32[2]; + } u; + + data->cpu = data->pid = data->tid = -1; data->stream_id = data->id = data->time = -1ULL; @@ -366,9 +378,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - data->pid = p[0]; - data->tid = p[1]; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } + + data->pid = u.val32[0]; + data->tid = u.val32[1]; array++; } @@ -395,8 +414,15 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - data->cpu = *p; + + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + } + + data->cpu = u.val32[0]; array++; } @@ -423,18 +449,27 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_RAW) { - u32 *p = (u32 *)array; + const u64 *pdata; + + u.val64 = *array; + if (WARN_ONCE(swapped, + "Endianness of raw data not corrected!\n")) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } if (sample_overlap(event, array, sizeof(u32))) return -EFAULT; - data->raw_size = *p; - p++; + data->raw_size = u.val32[0]; + pdata = (void *) array + sizeof(u32); - if (sample_overlap(event, p, data->raw_size)) + if (sample_overlap(event, pdata, data->raw_size)) return -EFAULT; - data->raw_data = p; + data->raw_data = (void *) pdata; } return 0; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 555fc3864b9..5d732621a46 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -659,7 +659,7 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) ret = -ENOENT; } - if (ret == 0) + if (ret >= 0) ret = convert_variable(&vr_die, pf); if (ret < 0) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index cbc8f215d4b..7624324efad 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -803,7 +803,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, first = list_entry(evlist->entries.next, struct perf_evsel, node); err = perf_event__parse_sample(event, first->attr.sample_type, perf_evsel__sample_size(first), - sample_id_all, &pevent->sample); + sample_id_all, &pevent->sample, false); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 170601e67d6..974d0cbee5e 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -162,7 +162,8 @@ static inline int perf_session__parse_sample(struct perf_session *session, { return perf_event__parse_sample(event, session->sample_type, session->sample_size, - session->sample_id_all, sample); + session->sample_id_all, sample, + session->header.needs_swap); } struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 401e220566f..1ee8f1e40f1 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -151,11 +151,17 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { u64 ip_l, ip_r; + if (!left->ms.sym && !right->ms.sym) + return right->level - left->level; + + if (!left->ms.sym || !right->ms.sym) + return cmp_null(left->ms.sym, right->ms.sym); + if (left->ms.sym == right->ms.sym) return 0; - ip_l = left->ms.sym ? left->ms.sym->start : left->ip; - ip_r = right->ms.sym ? right->ms.sym->start : right->ip; + ip_l = left->ms.sym->start; + ip_r = right->ms.sym->start; return (int64_t)(ip_r - ip_l); } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 469c0264ed2..40eeaf07725 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -74,16 +74,104 @@ static void dso__set_sorted_by_name(struct dso *dso, enum map_type type) bool symbol_type__is_a(char symbol_type, enum map_type map_type) { + symbol_type = toupper(symbol_type); + switch (map_type) { case MAP__FUNCTION: return symbol_type == 'T' || symbol_type == 'W'; case MAP__VARIABLE: - return symbol_type == 'D' || symbol_type == 'd'; + return symbol_type == 'D'; default: return false; } } +static int prefix_underscores_count(const char *str) +{ + const char *tail = str; + + while (*tail == '_') + tail++; + + return tail - str; +} + +#define SYMBOL_A 0 +#define SYMBOL_B 1 + +static int choose_best_symbol(struct symbol *syma, struct symbol *symb) +{ + s64 a; + s64 b; + + /* Prefer a symbol with non zero length */ + a = syma->end - syma->start; + b = symb->end - symb->start; + if ((b == 0) && (a > 0)) + return SYMBOL_A; + else if ((a == 0) && (b > 0)) + return SYMBOL_B; + + /* Prefer a non weak symbol over a weak one */ + a = syma->binding == STB_WEAK; + b = symb->binding == STB_WEAK; + if (b && !a) + return SYMBOL_A; + if (a && !b) + return SYMBOL_B; + + /* Prefer a global symbol over a non global one */ + a = syma->binding == STB_GLOBAL; + b = symb->binding == STB_GLOBAL; + if (a && !b) + return SYMBOL_A; + if (b && !a) + return SYMBOL_B; + + /* Prefer a symbol with less underscores */ + a = prefix_underscores_count(syma->name); + b = prefix_underscores_count(symb->name); + if (b > a) + return SYMBOL_A; + else if (a > b) + return SYMBOL_B; + + /* If all else fails, choose the symbol with the longest name */ + if (strlen(syma->name) >= strlen(symb->name)) + return SYMBOL_A; + else + return SYMBOL_B; +} + +static void symbols__fixup_duplicate(struct rb_root *symbols) +{ + struct rb_node *nd; + struct symbol *curr, *next; + + nd = rb_first(symbols); + + while (nd) { + curr = rb_entry(nd, struct symbol, rb_node); +again: + nd = rb_next(&curr->rb_node); + next = rb_entry(nd, struct symbol, rb_node); + + if (!nd) + break; + + if (curr->start != next->start) + continue; + + if (choose_best_symbol(curr, next) == SYMBOL_A) { + rb_erase(&next->rb_node, symbols); + goto again; + } else { + nd = rb_next(&curr->rb_node); + rb_erase(&curr->rb_node, symbols); + } + } +} + static void symbols__fixup_end(struct rb_root *symbols) { struct rb_node *nd, *prevnd = rb_first(symbols); @@ -438,18 +526,11 @@ int kallsyms__parse(const char *filename, void *arg, char *line = NULL; size_t n; int err = -1; - u64 prev_start = 0; - char prev_symbol_type = 0; - char *prev_symbol_name; FILE *file = fopen(filename, "r"); if (file == NULL) goto out_failure; - prev_symbol_name = malloc(KSYM_NAME_LEN); - if (prev_symbol_name == NULL) - goto out_close; - err = 0; while (!feof(file)) { @@ -470,7 +551,7 @@ int kallsyms__parse(const char *filename, void *arg, if (len + 2 >= line_len) continue; - symbol_type = toupper(line[len]); + symbol_type = line[len]; len += 2; symbol_name = line + len; len = line_len - len; @@ -480,24 +561,18 @@ int kallsyms__parse(const char *filename, void *arg, break; } - if (prev_symbol_type) { - u64 end = start; - if (end != prev_start) - --end; - err = process_symbol(arg, prev_symbol_name, - prev_symbol_type, prev_start, end); - if (err) - break; - } - - memcpy(prev_symbol_name, symbol_name, len + 1); - prev_symbol_type = symbol_type; - prev_start = start; + /* + * module symbols are not sorted so we add all + * symbols with zero length and rely on + * symbols__fixup_end() to fix it up. + */ + err = process_symbol(arg, symbol_name, + symbol_type, start, start); + if (err) + break; } - free(prev_symbol_name); free(line); -out_close: fclose(file); return err; @@ -703,6 +778,9 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; + symbols__fixup_duplicate(&dso->symbols[map->type]); + symbols__fixup_end(&dso->symbols[map->type]); + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) dso->symtab_type = SYMTAB__GUEST_KALLSYMS; else @@ -1092,8 +1170,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; - if (elf_read_build_id(elf, build_id, - BUILD_ID_SIZE) != BUILD_ID_SIZE) + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) goto out_elf_end; if (!dso__build_id_equal(dso, build_id)) @@ -1111,6 +1188,8 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, } opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); + if (opdshdr.sh_type != SHT_PROGBITS) + opdsec = NULL; if (opdsec) opddata = elf_rawdata(opdsec, NULL); @@ -1276,6 +1355,7 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (kmap) { /* @@ -1362,8 +1442,8 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) ptr = data->d_buf; while (ptr < (data->d_buf + data->d_size)) { GElf_Nhdr *nhdr = ptr; - int namesz = NOTE_ALIGN(nhdr->n_namesz), - descsz = NOTE_ALIGN(nhdr->n_descsz); + size_t namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); const char *name; ptr += sizeof(*nhdr); @@ -1372,8 +1452,10 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) if (nhdr->n_type == NT_GNU_BUILD_ID && nhdr->n_namesz == sizeof("GNU")) { if (memcmp(name, "GNU", sizeof("GNU")) == 0) { - memcpy(bf, ptr, BUILD_ID_SIZE); - err = BUILD_ID_SIZE; + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + err = descsz; break; } } @@ -1425,7 +1507,7 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) while (1) { char bf[BUFSIZ]; GElf_Nhdr nhdr; - int namesz, descsz; + size_t namesz, descsz; if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) break; @@ -1434,15 +1516,16 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) descsz = NOTE_ALIGN(nhdr.n_descsz); if (nhdr.n_type == NT_GNU_BUILD_ID && nhdr.n_namesz == sizeof("GNU")) { - if (read(fd, bf, namesz) != namesz) + if (read(fd, bf, namesz) != (ssize_t)namesz) break; if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { - if (read(fd, build_id, - BUILD_ID_SIZE) == BUILD_ID_SIZE) { + size_t sz = min(descsz, size); + if (read(fd, build_id, sz) == (ssize_t)sz) { + memset(build_id + sz, 0, size - sz); err = 0; break; } - } else if (read(fd, bf, descsz) != descsz) + } else if (read(fd, bf, descsz) != (ssize_t)descsz) break; } else { int n = namesz + descsz; |