diff options
491 files changed, 11367 insertions, 4324 deletions
@@ -514,6 +514,11 @@ S: Bessemerstraat 21 S: Amsterdam S: The Netherlands +N: NeilBrown +E: neil@brown.name +P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F 497A 39EC 9EDD 5662 81B9 +D: NFSD Maintainer 2000-2007 + N: Zach Brown E: zab@zabbo.net D: maestro pci sound diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab index 8b093f8222d..91bd6ca5440 100644 --- a/Documentation/ABI/testing/sysfs-kernel-slab +++ b/Documentation/ABI/testing/sysfs-kernel-slab @@ -346,6 +346,10 @@ Description: number of objects per slab. If a slab cannot be allocated because of fragmentation, SLUB will retry with the minimum order possible depending on its characteristics. + When debug_guardpage_minorder=N (N > 0) parameter is specified + (see Documentation/kernel-parameters.txt), the minimum possible + order is used and this sysfs entry can not be used to change + the order at run time. What: /sys/kernel/slab/cache/order_fallback Date: April 2008 diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 4d8774f6f48..4c95c0034a4 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -61,7 +61,7 @@ Brief summary of control files. memory.failcnt # show the number of memory usage hits limits memory.memsw.failcnt # show the number of memory+Swap hits limits memory.max_usage_in_bytes # show max memory usage recorded - memory.memsw.usage_in_bytes # show max memory+Swap usage recorded + memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded memory.soft_limit_in_bytes # set/show soft limit of memory usage memory.stat # show various statistics memory.use_hierarchy # set/show hierarchical account enabled @@ -410,8 +410,11 @@ memory.stat file includes following statistics cache - # of bytes of page cache memory. rss - # of bytes of anonymous and swap cache memory. mapped_file - # of bytes of mapped file (includes tmpfs/shmem) -pgpgin - # of pages paged in (equivalent to # of charging events). -pgpgout - # of pages paged out (equivalent to # of uncharging events). +pgpgin - # of charging events to the memory cgroup. The charging + event happens each time a page is accounted as either mapped + anon page(RSS) or cache page(Page Cache) to the cgroup. +pgpgout - # of uncharging events to the memory cgroup. The uncharging + event happens each time a page is unaccounted from the cgroup. swap - # of bytes of swap usage inactive_anon - # of bytes of anonymous memory and swap cache memory on LRU list. diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt new file mode 100644 index 00000000000..19f6af47a79 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt @@ -0,0 +1,78 @@ +* Freescale MC13783/MC13892 Power Management Integrated Circuit (PMIC) + +Required properties: +- compatible : Should be "fsl,mc13783" or "fsl,mc13892" + +Optional properties: +- fsl,mc13xxx-uses-adc : Indicate the ADC is being used +- fsl,mc13xxx-uses-codec : Indicate the Audio Codec is being used +- fsl,mc13xxx-uses-rtc : Indicate the RTC is being used +- fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used + +Sub-nodes: +- regulators : Contain the regulator nodes. The MC13892 regulators are + bound using their names as listed below with their registers and bits + for enabling. + + vcoincell : regulator VCOINCELL (register 13, bit 23) + sw1 : regulator SW1 (register 24, bit 0) + sw2 : regulator SW2 (register 25, bit 0) + sw3 : regulator SW3 (register 26, bit 0) + sw4 : regulator SW4 (register 27, bit 0) + swbst : regulator SWBST (register 29, bit 20) + vgen1 : regulator VGEN1 (register 32, bit 0) + viohi : regulator VIOHI (register 32, bit 3) + vdig : regulator VDIG (register 32, bit 9) + vgen2 : regulator VGEN2 (register 32, bit 12) + vpll : regulator VPLL (register 32, bit 15) + vusb2 : regulator VUSB2 (register 32, bit 18) + vgen3 : regulator VGEN3 (register 33, bit 0) + vcam : regulator VCAM (register 33, bit 6) + vvideo : regulator VVIDEO (register 33, bit 12) + vaudio : regulator VAUDIO (register 33, bit 15) + vsd : regulator VSD (register 33, bit 18) + gpo1 : regulator GPO1 (register 34, bit 6) + gpo2 : regulator GPO2 (register 34, bit 8) + gpo3 : regulator GPO3 (register 34, bit 10) + gpo4 : regulator GPO4 (register 34, bit 12) + pwgt1spi : regulator PWGT1SPI (register 34, bit 15) + pwgt2spi : regulator PWGT2SPI (register 34, bit 16) + vusb : regulator VUSB (register 50, bit 3) + + The bindings details of individual regulator device can be found in: + Documentation/devicetree/bindings/regulator/regulator.txt + +Examples: + +ecspi@70010000 { /* ECSPI1 */ + fsl,spi-num-chipselects = <2>; + cs-gpios = <&gpio3 24 0>, /* GPIO4_24 */ + <&gpio3 25 0>; /* GPIO4_25 */ + status = "okay"; + + pmic: mc13892@0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,mc13892"; + spi-max-frequency = <6000000>; + reg = <0>; + interrupt-parent = <&gpio0>; + interrupts = <8>; + + regulators { + sw1_reg: mc13892__sw1 { + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1375000>; + regulator-boot-on; + regulator-always-on; + }; + + sw2_reg: mc13892__sw2 { + regulator-min-microvolt = <900000>; + regulator-max-microvolt = <1850000>; + regulator-boot-on; + regulator-always-on; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/mfd/twl-familly.txt b/Documentation/devicetree/bindings/mfd/twl-familly.txt new file mode 100644 index 00000000000..a66fcf94675 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/twl-familly.txt @@ -0,0 +1,47 @@ +Texas Instruments TWL family + +The TWLs are Integrated Power Management Chips. +Some version might contain much more analog function like +USB transceiver or Audio amplifier. +These chips are connected to an i2c bus. + + +Required properties: +- compatible : Must be "ti,twl4030"; + For Integrated power-management/audio CODEC device used in OMAP3 + based boards +- compatible : Must be "ti,twl6030"; + For Integrated power-management used in OMAP4 based boards +- interrupts : This i2c device has an IRQ line connected to the main SoC +- interrupt-controller : Since the twl support several interrupts internally, + it is considered as an interrupt controller cascaded to the SoC one. +- #interrupt-cells = <1>; +- interrupt-parent : The parent interrupt controller. + +Optional node: +- Child nodes contain in the twl. The twl family is made of several variants + that support a different number of features. + The children nodes will thus depend of the capability of the variant. + + +Example: +/* + * Integrated Power Management Chip + * http://www.ti.com/lit/ds/symlink/twl6030.pdf + */ +twl@48 { + compatible = "ti,twl6030"; + reg = <0x48>; + interrupts = <39>; /* IRQ_SYS_1N cascaded to gic */ + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <0>; + + twl_rtc { + compatible = "ti,twl_rtc"; + interrupts = <11>; + reg = <0>; + }; +}; diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt index 510eab32f39..225f96d88f5 100644 --- a/Documentation/dma-buf-sharing.txt +++ b/Documentation/dma-buf-sharing.txt @@ -219,6 +219,10 @@ NOTES: If the exporter chooses not to allow an attach() operation once a map_dma_buf() API has been called, it simply returns an error. +Miscellaneous notes: +- Any exporters or users of the dma-buf buffer sharing framework must have + a 'select DMA_SHARED_BUFFER' in their respective Kconfigs. + References: [1] struct dma_buf_ops in include/linux/dma-buf.h [2] All interfaces mentioned above defined in include/linux/dma-buf.h diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 763d8ebbbeb..d6030aa3337 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -119,12 +119,20 @@ Mount Options must rely on TCP's error correction to detect data corruption in the data payload. - noasyncreaddir - Disable client's use its local cache to satisfy readdir - requests. (This does not change correctness; the client uses - cached metadata only when a lease or capability ensures it is - valid.) + dcache + Use the dcache contents to perform negative lookups and + readdir when the client has the entire directory contents in + its cache. (This does not change correctness; the client uses + cached metadata only when a lease or capability ensures it is + valid.) + + nodcache + Do not use the dcache as above. This avoids a significant amount of + complex code, sacrificing performance without affecting correctness, + and is useful for tracking down bugs. + noasyncreaddir + Do not use the dcache as above for readdir. More Information ================ diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index a57e12411d2..1716874a651 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX @@ -2,6 +2,8 @@ - this file (nfs-related documentation). Exporting - explanation of how to make filesystems exportable. +fault_injection.txt + - information for using fault injection on the server knfsd-stats.txt - statistics which the NFS server makes available to user space. nfs.txt diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt new file mode 100644 index 00000000000..426d166089a --- /dev/null +++ b/Documentation/filesystems/nfs/fault_injection.txt @@ -0,0 +1,69 @@ + +Fault Injection +=============== +Fault injection is a method for forcing errors that may not normally occur, or +may be difficult to reproduce. Forcing these errors in a controlled environment +can help the developer find and fix bugs before their code is shipped in a +production system. Injecting an error on the Linux NFS server will allow us to +observe how the client reacts and if it manages to recover its state correctly. + +NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this +feature. + + +Using Fault Injection +===================== +On the client, mount the fault injection server through NFS v4.0+ and do some +work over NFS (open files, take locks, ...). + +On the server, mount the debugfs filesystem to <debug_dir> and ls +<debug_dir>/nfsd. This will show a list of files that will be used for +injecting faults on the NFS server. As root, write a number n to the file +corresponding to the action you want the server to take. The server will then +process the first n items it finds. So if you want to forget 5 locks, echo '5' +to <debug_dir>/nfsd/forget_locks. A value of 0 will tell the server to forget +all corresponding items. A log message will be created containing the number +of items forgotten (check dmesg). + +Go back to work on the client and check if the client recovered from the error +correctly. + + +Available Faults +================ +forget_clients: + The NFS server keeps a list of clients that have placed a mount call. If + this list is cleared, the server will have no knowledge of who the client + is, forcing the client to reauthenticate with the server. + +forget_openowners: + The NFS server keeps a list of what files are currently opened and who + they were opened by. Clearing this list will force the client to reopen + its files. + +forget_locks: + The NFS server keeps a list of what files are currently locked in the VFS. + Clearing this list will force the client to reclaim its locks (files are + unlocked through the VFS as they are cleared from this list). + +forget_delegations: + A delegation is used to assure the client that a file, or part of a file, + has not changed since the delegation was awarded. Clearing this list will + force the client to reaquire its delegation before accessing the file + again. + +recall_delegations: + Delegations can be recalled by the server when another client attempts to + access a file. This test will notify the client that its delegation has + been revoked, forcing the client to reaquire the delegation before using + the file again. + + +tools/nfs/inject_faults.sh script +================================= +This script has been created to ease the fault injection process. This script +will detect the mounted debugfs directory and write to the files located there +based on the arguments passed by the user. For example, running +`inject_faults.sh forget_locks 1` as root will instruct the server to forget +one lock. Running `inject_faults forget_locks` will instruct the server to +forgetall locks. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 12fee132fbe..a76a26a1db8 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -307,6 +307,9 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) blkio_ticks time spent waiting for block IO gtime guest time of the task in jiffies cgtime guest time of the task children in jiffies + start_data address above which program data+bss is placed + end_data address below which program data+bss is placed + start_brk address above which program heap can be expanded with brk() .............................................................................. The /proc/PID/maps file containing the currently mapped memory regions and diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 7db3ebda5a4..403c090aca3 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt @@ -93,8 +93,8 @@ byte alignment: Compressed data blocks are written to the filesystem as files are read from the source directory, and checked for duplicates. Once all file data has been -written the completed inode, directory, fragment, export and uid/gid lookup -tables are written. +written the completed inode, directory, fragment, export, uid/gid lookup and +xattr tables are written. 3.1 Compression options ----------------------- @@ -151,7 +151,7 @@ in each metadata block. Directories are sorted in alphabetical order, and at lookup the index is scanned linearly looking for the first filename alphabetically larger than the filename being looked up. At this point the location of the metadata block the filename is in has been found. -The general idea of the index is ensure only one metadata block needs to be +The general idea of the index is to ensure only one metadata block needs to be decompressed to do a lookup irrespective of the length of the directory. This scheme has the advantage that it doesn't require extra memory overhead and doesn't require much extra storage on disk. diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt index 8898a95b41e..22ae8441489 100644 --- a/Documentation/mmc/mmc-dev-attrs.txt +++ b/Documentation/mmc/mmc-dev-attrs.txt @@ -64,3 +64,13 @@ Note on Erase Size and Preferred Erase Size: size specified by the card. "preferred_erase_size" is in bytes. + +SD/MMC/SDIO Clock Gating Attribute +================================== + +Read and write access is provided to following attribute. +This attribute appears only if CONFIG_MMC_CLKGATE is enabled. + + clkgate_delay Tune the clock gating delay with desired value in milliseconds. + +echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt index 2db28b8e662..f08d078d43c 100644 --- a/Documentation/mmc/mmc-dev-parts.txt +++ b/Documentation/mmc/mmc-dev-parts.txt @@ -25,3 +25,16 @@ echo 0 > /sys/block/mmcblkXbootY/force_ro To re-enable read-only access: echo 1 > /sys/block/mmcblkXbootY/force_ro + +The boot partitions can also be locked read only until the next power on, +with: + +echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on + +This is a feature of the card and not of the kernel. If the card does +not support boot partition locking, the file will not exist. If the +feature has been disabled on the card, the file will be read-only. + +The boot partitions can also be locked permanently, but this feature is +not accessible through sysfs in order to avoid accidental or malicious +bricking. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6d8cd8b2c30..8c20fbd8b42 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -415,6 +415,14 @@ PIDs of value pid_max or larger are not allocated. ============================================================== +ns_last_pid: + +The last pid allocated in the current (the one task using this sysctl +lives in) pid namespace. When selecting a pid for a next task on fork +kernel tries to allocate a number starting from this one. + +============================================================== + powersave-nap: (PPC only) If set, Linux-PPC will use the 'nap' mode of powersaving, diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index 2acdda9601b..6752870c497 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt @@ -131,7 +131,10 @@ slub_min_objects. slub_max_order specified the order at which slub_min_objects should no longer be checked. This is useful to avoid SLUB trying to generate super large order pages to fit slub_min_objects of a slab cache with -large object sizes into one high order page. +large object sizes into one high order page. Setting command line +parameter debug_guardpage_minorder=N (N > 0), forces setting +slub_max_order to 0, what cause minimum possible order of slabs +allocation. SLUB Debug output ----------------- diff --git a/MAINTAINERS b/MAINTAINERS index 1094edf0da1..4d1ba2022a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3193,6 +3193,7 @@ F: drivers/i2c/busses/i2c-stub.c I2C SUBSYSTEM M: "Jean Delvare (PC drivers, core)" <khali@linux-fr.org> M: "Ben Dooks (embedded platforms)" <ben-linux@fluff.org> +M: "Wolfram Sang (embedded platforms)" <w.sang@pengutronix.de> L: linux-i2c@vger.kernel.org W: http://i2c.wiki.kernel.org/ T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-i2c/ @@ -3774,7 +3775,6 @@ S: Odd Fixes KERNEL NFSD, SUNRPC, AND LOCKD SERVERS M: "J. Bruce Fields" <bfields@fieldses.org> -M: Neil Brown <neilb@suse.de> L: linux-nfs@vger.kernel.org W: http://nfs.sourceforge.net/ S: Supported @@ -4683,6 +4683,8 @@ Q: http://patchwork.kernel.org/project/linux-omap/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git S: Maintained F: arch/arm/*omap*/ +F: drivers/i2c/busses/i2c-omap.c +F: include/linux/i2c-omap.h OMAP CLOCK FRAMEWORK SUPPORT M: Paul Walmsley <paul@pwsan.com> @@ -5956,6 +5958,7 @@ L: davinci-linux-open-source@linux.davincidsp.com (subscribers-only) Q: http://patchwork.kernel.org/project/linux-davinci/list/ S: Supported F: arch/arm/mach-davinci +F: drivers/i2c/busses/i2c-davinci.c SIS 190 ETHERNET DRIVER M: Francois Romieu <romieu@fr.zoreil.com> diff --git a/arch/Kconfig b/arch/Kconfig index 2505740b81d..4f55c736be1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -185,4 +185,18 @@ config HAVE_RCU_TABLE_FREE config ARCH_HAVE_NMI_SAFE_CMPXCHG bool +config HAVE_ALIGNED_STRUCT_PAGE + bool + help + This makes sure that struct pages are double word aligned and that + e.g. the SLUB allocator can perform double word atomic operations + on a struct page for better performance. However selecting this + might increase the size of a struct page by a word. + +config HAVE_CMPXCHG_LOCAL + bool + +config HAVE_CMPXCHG_DOUBLE + bool + source "kernel/gcov/Kconfig" diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 6b93e200bca..5bc6b3837b2 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -202,6 +202,7 @@ static struct irda_platform_data assabet_irda_data = { static struct mcp_plat_data assabet_mcp_data = { .mccr0 = MCCR0_ADM, .sclk_rate = 11981000, + .codec = "ucb1x00", }; static void __init assabet_init(void) @@ -252,6 +253,17 @@ static void __init assabet_init(void) sa11x0_register_mtd(&assabet_flash_data, assabet_flash_resources, ARRAY_SIZE(assabet_flash_resources)); sa11x0_register_irda(&assabet_irda_data); + + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + + ASSABET_BCR_set(ASSABET_BCR_CODEC_RST); sa11x0_register_mcp(&assabet_mcp_data); } diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c index 11bb6d0b9be..d12d0f48b1d 100644 --- a/arch/arm/mach-sa1100/cerf.c +++ b/arch/arm/mach-sa1100/cerf.c @@ -124,12 +124,23 @@ static void __init cerf_map_io(void) static struct mcp_plat_data cerf_mcp_data = { .mccr0 = MCCR0_ADM, .sclk_rate = 11981000, + .codec = "ucb1x00", }; static void __init cerf_init(void) { platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices)); sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1); + + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + sa11x0_register_mcp(&cerf_mcp_data); } diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index b9060e236de..c483912d08a 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -27,6 +27,7 @@ #include <linux/timer.h> #include <linux/gpio.h> #include <linux/pda_power.h> +#include <linux/mfd/ucb1x00.h> #include <mach/hardware.h> #include <asm/mach-types.h> @@ -85,10 +86,15 @@ static struct scoop_pcmcia_config collie_pcmcia_config = { .num_devs = 1, }; +static struct ucb1x00_plat_data collie_ucb1x00_data = { + .gpio_base = COLLIE_TC35143_GPIO_BASE, +}; + static struct mcp_plat_data collie_mcp_data = { .mccr0 = MCCR0_ADM | MCCR0_ExtClk, .sclk_rate = 9216000, - .gpio_base = COLLIE_TC35143_GPIO_BASE, + .codec = "ucb1x00", + .codec_pdata = &collie_ucb1x00_data, }; /* @@ -351,6 +357,16 @@ static void __init collie_init(void) sa11x0_register_mtd(&collie_flash_data, collie_flash_resources, ARRAY_SIZE(collie_flash_resources)); + + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + sa11x0_register_mcp(&collie_mcp_data); sharpsl_save_param(); diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 480d2ea46b0..e3a28ca2a7b 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -217,10 +217,15 @@ static struct platform_device sa11x0uart3_device = { static struct resource sa11x0mcp_resources[] = { [0] = { .start = __PREG(Ser4MCCR0), - .end = __PREG(Ser4MCCR0) + 0xffff, + .end = __PREG(Ser4MCCR0) + 0x1C - 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = __PREG(Ser4MCCR1), + .end = __PREG(Ser4MCCR1) + 0x4 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_Ser4MCP, .end = IRQ_Ser4MCP, .flags = IORESOURCE_IRQ, diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h index ed1a331508a..586cec898b3 100644 --- a/arch/arm/mach-sa1100/include/mach/mcp.h +++ b/arch/arm/mach-sa1100/include/mach/mcp.h @@ -17,6 +17,8 @@ struct mcp_plat_data { u32 mccr1; unsigned int sclk_rate; int gpio_base; + const char *codec; + void *codec_pdata; }; #endif diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c index af4e2761f3d..d117ceab621 100644 --- a/arch/arm/mach-sa1100/lart.c +++ b/arch/arm/mach-sa1100/lart.c @@ -24,10 +24,20 @@ static struct mcp_plat_data lart_mcp_data = { .mccr0 = MCCR0_ADM, .sclk_rate = 11981000, + .codec = "ucb1x00", }; static void __init lart_init(void) { + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + sa11x0_register_mcp(&lart_mcp_data); } diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index 318b2b766a0..748d34435b3 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -55,11 +55,22 @@ static struct resource shannon_flash_resource = { static struct mcp_plat_data shannon_mcp_data = { .mccr0 = MCCR0_ADM, .sclk_rate = 11981000, + .codec = "ucb1x00", }; static void __init shannon_init(void) { sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1); + + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + sa11x0_register_mcp(&shannon_mcp_data); } diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index e17c04d6e32..458ececefa5 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -14,6 +14,7 @@ #include <linux/mtd/partitions.h> #include <linux/io.h> #include <linux/gpio.h> +#include <linux/mfd/ucb1x00.h> #include <asm/irq.h> #include <mach/hardware.h> @@ -187,10 +188,15 @@ static struct resource simpad_flash_resources [] = { } }; +static struct ucb1x00_plat_data simpad_ucb1x00_data = { + .gpio_base = SIMPAD_UCB1X00_GPIO_BASE, +}; + static struct mcp_plat_data simpad_mcp_data = { .mccr0 = MCCR0_ADM, .sclk_rate = 11981000, - .gpio_base = SIMPAD_UCB1X00_GPIO_BASE, + .codec = "ucb1300", + .codec_pdata = &simpad_ucb1x00_data, }; @@ -378,6 +384,16 @@ static int __init simpad_init(void) sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources, ARRAY_SIZE(simpad_flash_resources)); + + /* + * Setup the PPC unit correctly. + */ + PPDR &= ~PPC_RXD4; + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PSDR |= PPC_RXD4; + PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + sa11x0_register_mcp(&simpad_mcp_data); ret = platform_add_devices(devices, ARRAY_SIZE(devices)); diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c index 9361a529017..5c00712907d 100644 --- a/arch/arm/mach-ux500/board-mop500.c +++ b/arch/arm/mach-ux500/board-mop500.c @@ -19,11 +19,11 @@ #include <linux/amba/pl022.h> #include <linux/amba/serial.h> #include <linux/spi/spi.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/regulator/ab8500.h> #include <linux/mfd/tc3589x.h> #include <linux/mfd/tps6105x.h> -#include <linux/mfd/ab8500/gpio.h> +#include <linux/mfd/abx500/ab8500-gpio.h> #include <linux/leds-lp5521.h> #include <linux/input.h> #include <linux/smsc911x.h> diff --git a/arch/arm/mach-ux500/board-u5500.c b/arch/arm/mach-ux500/board-u5500.c index fe1569b67c9..9de9e9c4dbb 100644 --- a/arch/arm/mach-ux500/board-u5500.c +++ b/arch/arm/mach-ux500/board-u5500.c @@ -10,7 +10,7 @@ #include <linux/amba/bus.h> #include <linux/irq.h> #include <linux/i2c.h> -#include <linux/mfd/ab5500/ab5500.h> +#include <linux/mfd/abx500/ab5500.h> #include <asm/hardware/gic.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h index 47969909836..d2d4131435a 100644 --- a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h +++ b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h @@ -9,7 +9,7 @@ #define __MACH_IRQS_BOARD_MOP500_H /* Number of AB8500 irqs is taken from header file */ -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #define MOP500_AB8500_IRQ_BASE IRQ_BOARD_START #define MOP500_AB8500_IRQ_END (MOP500_AB8500_IRQ_BASE \ diff --git a/arch/arm/plat-samsung/include/plat/sdhci.h b/arch/arm/plat-samsung/include/plat/sdhci.h index 656dc00d30e..f82f888b91a 100644 --- a/arch/arm/plat-samsung/include/plat/sdhci.h +++ b/arch/arm/plat-samsung/include/plat/sdhci.h @@ -63,6 +63,7 @@ enum clk_types { struct s3c_sdhci_platdata { unsigned int max_width; unsigned int host_caps; + unsigned int pm_caps; enum cd_types cd_type; enum clk_types clk_type; diff --git a/arch/arm/plat-samsung/platformdata.c b/arch/arm/plat-samsung/platformdata.c index ceb9fa3a80c..0f707184eae 100644 --- a/arch/arm/plat-samsung/platformdata.c +++ b/arch/arm/plat-samsung/platformdata.c @@ -53,6 +53,8 @@ void s3c_sdhci_set_platdata(struct s3c_sdhci_platdata *pd, set->cfg_gpio = pd->cfg_gpio; if (pd->host_caps) set->host_caps |= pd->host_caps; + if (pd->pm_caps) + set->pm_caps |= pd->pm_caps; if (pd->clk_type) set->clk_type = pd->clk_type; } diff --git a/arch/avr32/include/asm/system.h b/arch/avr32/include/asm/system.h index 9702c2213e1..62d9ded0163 100644 --- a/arch/avr32/include/asm/system.h +++ b/arch/avr32/include/asm/system.h @@ -169,7 +169,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) struct pt_regs; -void NORET_TYPE die(const char *str, struct pt_regs *regs, long err); +void die(const char *str, struct pt_regs *regs, long err); void _exception(long signr, struct pt_regs *regs, int code, unsigned long addr); diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index 7aa25756412..3d760c06f02 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -24,7 +24,7 @@ static DEFINE_SPINLOCK(die_lock); -void NORET_TYPE die(const char *str, struct pt_regs *regs, long err) +void die(const char *str, struct pt_regs *regs, long err) { static int die_counter; diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index d9f397fae03..691be0b95c1 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -309,7 +309,6 @@ struct thread_struct { } #define start_thread(regs,new_ip,new_sp) do { \ - set_fs(USER_DS); \ regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL)) \ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS)); \ regs->cr_iip = new_ip; \ diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 7617248f0d1..7a3bd252494 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -323,11 +323,12 @@ #define __NR_sendmmsg 1331 #define __NR_process_vm_readv 1332 #define __NR_process_vm_writev 1333 +#define __NR_accept4 1334 #ifdef __KERNEL__ -#define NR_syscalls 310 /* length of syscall table */ +#define NR_syscalls 311 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 5b31d46aff6..1ccbe12a4d8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1779,6 +1779,7 @@ sys_call_table: data8 sys_sendmmsg data8 sys_process_vm_readv data8 sys_process_vm_writev + data8 sys_accept4 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 3d3aeef4694..4eed3581499 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -27,11 +27,11 @@ #include <asm/sal.h> #include <asm/mca.h> -typedef NORET_TYPE void (*relocate_new_kernel_t)( +typedef void (*relocate_new_kernel_t)( unsigned long indirection_page, unsigned long start_address, struct ia64_boot_param *boot_param, - unsigned long pal_addr) ATTRIB_NORET; + unsigned long pal_addr) __noreturn; struct kimage *ia64_kimage; diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 82a4bb51d5d..b95a451b1c3 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -511,8 +511,7 @@ static unsigned long amiga_gettimeoffset(void) return ticks + offset; } -static NORET_TYPE void amiga_reset(void) - ATTRIB_NORET; +static void amiga_reset(void) __noreturn; static void amiga_reset(void) { diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index de39b1f343e..7b99c670e47 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -144,7 +144,7 @@ extern int ptrace_set_watch_regs(struct task_struct *child, extern asmlinkage void syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); -extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET; +extern void die(const char *, struct pt_regs *) __noreturn; static inline void die_if_kernel(const char *str, struct pt_regs *regs) { diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5c8a49d5505..bbddb86c1fa 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1340,7 +1340,7 @@ void ejtag_exception_handler(struct pt_regs *regs) /* * NMI exception handler. */ -NORET_TYPE void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs) +void __noreturn nmi_exception_handler(struct pt_regs *regs) { bust_spinlocks(1); printk("NMI taken!!!!\n"); diff --git a/arch/mn10300/include/asm/exceptions.h b/arch/mn10300/include/asm/exceptions.h index ca3e20508c7..95a4d42c3a0 100644 --- a/arch/mn10300/include/asm/exceptions.h +++ b/arch/mn10300/include/asm/exceptions.h @@ -110,7 +110,7 @@ extern asmlinkage void nmi_handler(void); extern asmlinkage void misalignment(struct pt_regs *, enum exception_code); extern void die(const char *, struct pt_regs *, enum exception_code) - ATTRIB_NORET; + __noreturn; extern int die_if_no_fixup(const char *, struct pt_regs *, enum exception_code); diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 9ce66e9d1c2..7213ec9e594 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -196,7 +196,6 @@ typedef unsigned int elf_caddr_t; /* offset pc for priv. level */ \ pc |= 3; \ \ - set_fs(USER_DS); \ regs->iasq[0] = spaceid; \ regs->iasq[1] = spaceid; \ regs->iaoq[0] = pc; \ @@ -299,7 +298,6 @@ on downward growing arches, it looks like this: elf_addr_t pc = (elf_addr_t)new_pc | 3; \ elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1; \ \ - set_fs(USER_DS); \ regs->iasq[0] = spaceid; \ regs->iasq[1] = spaceid; \ regs->iaoq[0] = pc; \ diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 4b4b9181a1a..62c60b87d03 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -192,7 +192,6 @@ void flush_thread(void) /* Only needs to handle fpu stuff or perf monitors. ** REVISIT: several arches implement a "lazy fpu state". */ - set_fs(USER_DS); } void release_thread(struct task_struct *dead_task) diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 7c66ce13da8..0a48bf5db6c 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -50,12 +50,6 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); -#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) -static const bool idle_uses_rcu = 1; -#else -static const bool idle_uses_rcu; -#endif - /* * The body of the idle task. */ @@ -67,8 +61,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { tick_nohz_idle_enter(); - if (!idle_uses_rcu) - rcu_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -106,8 +99,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - if (!idle_uses_rcu) - rcu_idle_exit(); + rcu_idle_exit(); tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index e63f2e7d2ef..affe5dcce7f 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -16,10 +16,10 @@ #include <asm/hw_irq.h> #include <asm/io.h> -typedef NORET_TYPE void (*relocate_new_kernel_t)( +typedef void (*relocate_new_kernel_t)( unsigned long indirection_page, unsigned long reboot_code_buffer, - unsigned long start_address) ATTRIB_NORET; + unsigned long start_address) __noreturn; /* * This is a generic machine_kexec function suitable at least for diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 26ccbf77dd4..d7f609086a9 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -307,9 +307,9 @@ static union thread_union kexec_stack __init_task_data = struct paca_struct kexec_paca; /* Our assembly helper, in kexec_stub.S */ -extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, - void *image, void *control, - void (*clear_all)(void)) ATTRIB_NORET; +extern void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) __noreturn; /* too late to fail here */ void default_machine_kexec(struct kimage *image) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 4ff3d8e411a..3feefc3842a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -58,7 +58,7 @@ static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * - * Note: node_to_cpumask() is not valid until after this is done. + * Note: cpumask_of_node() is not valid until after this is done. */ static void __init setup_node_to_cpumask_map(void) { diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index fd05fdee576..3ce73d0052b 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -36,6 +36,7 @@ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ ld r12,hcall_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ cmpdi r12,0; \ beq+ 1f; \ mflr r0; \ @@ -74,7 +75,7 @@ END_FTR_SECTION(0, 1); \ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,hcall_tracepoint_refcount@toc(r2); \ + ld r12,32(r1); \ cmpdi r12,0; \ beq+ 1f; \ mflr r0; \ diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 948e0e3b354..7bc73af6c7b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -546,6 +546,13 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) unsigned long flags; unsigned int *depth; + /* + * We cannot call tracepoints inside RCU idle regions which + * means we must not trace H_CEDE. + */ + if (opcode == H_CEDE) + return; + local_irq_save(flags); depth = &__get_cpu_var(hcall_trace_depth); @@ -556,8 +563,6 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) (*depth)++; preempt_disable(); trace_hcall_entry(opcode, args); - if (opcode == H_CEDE) - rcu_idle_enter(); (*depth)--; out: @@ -570,6 +575,9 @@ void __trace_hcall_exit(long opcode, unsigned long retval, unsigned long flags; unsigned int *depth; + if (opcode == H_CEDE) + return; + local_irq_save(flags); depth = &__get_cpu_var(hcall_trace_depth); @@ -578,8 +586,6 @@ void __trace_hcall_exit(long opcode, unsigned long retval, goto out; (*depth)++; - if (opcode == H_CEDE) - rcu_idle_exit(); trace_hcall_exit(opcode, retval, retbuf); preempt_enable(); (*depth)--; diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 330a57b7c17..36f957f3184 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -638,7 +638,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, /* These are almost always orderly shutdowns. */ return; case KMSG_DUMP_OOPS: - case KMSG_DUMP_KEXEC: break; case KMSG_DUMP_PANIC: panicking = true; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 27272f6a14c..d25843a6a91 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -236,7 +236,7 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) /* * Function to drop a processor into disabled wait state */ -static inline void ATTRIB_NORET disabled_wait(unsigned long code) +static inline void __noreturn disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fab88431a06..0fd2e863e11 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -30,7 +30,7 @@ struct mcck_struct { static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static NORET_TYPE void s390_handle_damage(char *msg) +static void s390_handle_damage(char *msg) { smp_send_stop(); disabled_wait((unsigned long) __builtin_return_address(0)); diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index aaf6d59c201..7ec66517812 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -70,7 +70,7 @@ void show_regs(struct pt_regs * regs) /* * Create a kernel thread */ -ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *)) +__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *)) { do_exit(fn(arg)); } diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 210c1cabcb7..cbd4e4bb9fc 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -285,7 +285,7 @@ void show_regs(struct pt_regs *regs) /* * Create a kernel thread */ -ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *)) +__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *)) { do_exit(fn(arg)); } diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index e00d7179989..6255f2eab11 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -248,11 +248,11 @@ static void setup_quasi_va_is_pa(void) } -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { void *reboot_code_buffer; - NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long) - ATTRIB_NORET; + void (*rnk)(unsigned long, void *, unsigned long) + __noreturn; /* Mask all interrupts before starting to reboot. */ interrupt_mask_set_mask(~0ULL); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a150f4c35e9..6c14ecd851d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,9 @@ config X86 select PERF_EVENTS select HAVE_PERF_EVENTS_NMI select ANON_INODES + select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 + select HAVE_CMPXCHG_LOCAL if !M386 + select HAVE_CMPXCHG_DOUBLE select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER select ARCH_BINFMT_ELF_RANDOMIZE_PIE diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e3ca7e0d858..3c57033e221 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -309,12 +309,6 @@ config X86_INTERNODE_CACHE_SHIFT config X86_CMPXCHG def_bool X86_64 || (X86_32 && !M386) -config CMPXCHG_LOCAL - def_bool X86_64 || (X86_32 && !M386) - -config CMPXCHG_DOUBLE - def_bool y - config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f22a9f7f639..29ba3297e48 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2011,7 +2011,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) if (!mce_available(&boot_cpu_data)) return -EIO; - memset(&dev->kobj, 0, sizeof(struct kobject)); + memset(dev, 0, sizeof(struct device)); dev->id = cpu; dev->bus = &mce_subsys; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 020cd2e8087..19d3fa08b11 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -110,7 +110,7 @@ void __cpuinit numa_clear_node(int cpu) * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * - * Note: node_to_cpumask() is not valid until after this is done. + * Note: cpumask_of_node() is not valid until after this is done. * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ void __init setup_node_to_cpumask_map(void) diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 1d97bd84b6f..b2b54d2edf5 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -6,14 +6,6 @@ menu "UML-specific options" menu "Host processor type and features" -config CMPXCHG_LOCAL - bool - default n - -config CMPXCHG_DOUBLE - bool - default n - source "arch/x86/Kconfig.cpu" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 1b3142127bf..c07be024b96 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -97,7 +97,7 @@ obj-$(CONFIG_EISA) += eisa/ obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ -obj-$(CONFIG_MMC) += mmc/ +obj-y += mmc/ obj-$(CONFIG_MEMSTICK) += memstick/ obj-y += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index fcbec8ac134..7be9f79018e 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -179,7 +179,7 @@ config GENERIC_CPU_DEVICES source "drivers/base/regmap/Kconfig" config DMA_SHARED_BUFFER - bool "Buffer framework to be shared between drivers" + bool default n select ANON_INODES depends on EXPERIMENTAL diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f17e3ea041c..ed5de58c340 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -295,11 +295,22 @@ static int memory_block_change_state(struct memory_block *mem, ret = memory_block_action(mem->start_section_nr, to_state); - if (ret) + if (ret) { mem->state = from_state_req; - else - mem->state = to_state; + goto out; + } + mem->state = to_state; + switch (mem->state) { + case MEM_OFFLINE: + kobject_uevent(&mem->dev.kobj, KOBJ_OFFLINE); + break; + case MEM_ONLINE: + kobject_uevent(&mem->dev.kobj, KOBJ_ONLINE); + break; + default: + break; + } out: mutex_unlock(&mem->state_mutex); return ret; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 148ab944378..3fd31dec8c9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2184,6 +2184,8 @@ static ssize_t rbd_add(struct bus_type *bus, INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header.snap_rwsem); + /* generate unique id: find highest unique id, add one */ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c index 7c7f42a1f88..9fec3232b73 100644 --- a/drivers/char/ramoops.c +++ b/drivers/char/ramoops.c @@ -83,8 +83,7 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper, struct timeval timestamp; if (reason != KMSG_DUMP_OOPS && - reason != KMSG_DUMP_PANIC && - reason != KMSG_DUMP_KEXEC) + reason != KMSG_DUMP_PANIC) return; /* Only dump oopses if dump_oops is set */ @@ -126,8 +125,8 @@ static int __init ramoops_probe(struct platform_device *pdev) goto fail3; } - rounddown_pow_of_two(pdata->mem_size); - rounddown_pow_of_two(pdata->record_size); + pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); + pdata->record_size = rounddown_pow_of_two(pdata->record_size); /* Check for the minimum memory size */ if (pdata->mem_size < MIN_MEM_SIZE && @@ -148,14 +147,6 @@ static int __init ramoops_probe(struct platform_device *pdev) cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; cxt->dump_oops = pdata->dump_oops; - /* - * Update the module parameter variables as well so they are visible - * through /sys/module/ramoops/parameters/ - */ - mem_size = pdata->mem_size; - mem_address = pdata->mem_address; - record_size = pdata->record_size; - dump_oops = pdata->dump_oops; if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { pr_err("request mem region failed\n"); @@ -176,6 +167,15 @@ static int __init ramoops_probe(struct platform_device *pdev) goto fail1; } + /* + * Update the module parameter variables as well so they are visible + * through /sys/module/ramoops/parameters/ + */ + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + record_size = pdata->record_size; + dump_oops = pdata->dump_oops; + return 0; fail1: diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 4c980b57332..87a68a896ab 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -65,7 +65,14 @@ static void stmpe_gpio_set(struct gpio_chip *chip, unsigned offset, int val) u8 reg = stmpe->regs[which] - (offset / 8); u8 mask = 1 << (offset % 8); - stmpe_reg_write(stmpe, reg, mask); + /* + * Some variants have single register for gpio set/clear functionality. + * For them we need to write 0 to clear and 1 to set. + */ + if (stmpe->regs[STMPE_IDX_GPSR_LSB] == stmpe->regs[STMPE_IDX_GPCR_LSB]) + stmpe_set_bits(stmpe, reg, mask, val ? mask : 0); + else + stmpe_reg_write(stmpe, reg, mask); } static int stmpe_gpio_direction_output(struct gpio_chip *chip, @@ -132,6 +139,10 @@ static int stmpe_gpio_irq_set_type(struct irq_data *d, unsigned int type) if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) return -EINVAL; + /* STMPE801 doesn't have RE and FE registers */ + if (stmpe_gpio->stmpe->partnum == STMPE801) + return 0; + if (type == IRQ_TYPE_EDGE_RISING) stmpe_gpio->regs[REG_RE][regoffset] |= mask; else @@ -165,6 +176,11 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) int i, j; for (i = 0; i < CACHE_NR_REGS; i++) { + /* STMPE801 doesn't have RE and FE registers */ + if ((stmpe->partnum == STMPE801) && + (i != REG_IE)) + continue; + for (j = 0; j < num_banks; j++) { u8 old = stmpe_gpio->oldregs[i][j]; u8 new = stmpe_gpio->regs[i][j]; @@ -241,8 +257,11 @@ static irqreturn_t stmpe_gpio_irq(int irq, void *dev) } stmpe_reg_write(stmpe, statmsbreg + i, status[i]); - stmpe_reg_write(stmpe, stmpe->regs[STMPE_IDX_GPEDR_MSB] + i, - status[i]); + + /* Edge detect register is not present on 801 */ + if (stmpe->partnum != STMPE801) + stmpe_reg_write(stmpe, stmpe->regs[STMPE_IDX_GPEDR_MSB] + + i, status[i]); } return IRQ_HANDLED; diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c index 6d0f10b7569..c100f3e9c92 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_crt.c +++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c @@ -66,6 +66,7 @@ static void cdv_intel_crt_dpms(struct drm_encoder *encoder, int mode) static int cdv_intel_crt_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_psb_private *dev_priv = connector->dev->dev_private; int max_clock = 0; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; @@ -82,6 +83,11 @@ static int cdv_intel_crt_mode_valid(struct drm_connector *connector, if (mode->hdisplay > 1680 || mode->vdisplay > 1050) return MODE_PANEL; + /* We assume worst case scenario of 32 bpp here, since we don't know */ + if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) > + dev_priv->vram_stolen_size) + return MODE_MEM; + return MODE_OK; } diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c index 50d7cfb5166..de25560e629 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c +++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c @@ -241,6 +241,7 @@ static int cdv_hdmi_get_modes(struct drm_connector *connector) static int cdv_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_psb_private *dev_priv = connector->dev->dev_private; if (mode->clock > 165000) return MODE_CLOCK_HIGH; @@ -255,14 +256,11 @@ static int cdv_hdmi_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_INTERLACE) return MODE_NO_INTERLACE; - /* - * FIXME: for now we limit the size to 1680x1050 on CDV, otherwise it - * will go beyond the stolen memory size allocated to the framebuffer - */ - if (mode->hdisplay > 1680) - return MODE_PANEL; - if (mode->vdisplay > 1050) - return MODE_PANEL; + /* We assume worst case scenario of 32 bpp here, since we don't know */ + if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) > + dev_priv->vram_stolen_size) + return MODE_MEM; + return MODE_OK; } diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 36878a60080..025d30970cc 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -506,6 +506,7 @@ int oaktrail_crtc_hdmi_mode_set(struct drm_crtc *crtc, static int oaktrail_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_psb_private *dev_priv = connector->dev->dev_private; if (mode->clock > 165000) return MODE_CLOCK_HIGH; if (mode->clock < 20000) @@ -514,6 +515,11 @@ static int oaktrail_hdmi_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; + /* We assume worst case scenario of 32 bpp here, since we don't know */ + if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) > + dev_priv->vram_stolen_size) + return MODE_MEM; + return MODE_OK; } diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index 4882b29119e..88b42971c0f 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -1141,6 +1141,7 @@ static void psb_intel_sdvo_dpms(struct drm_encoder *encoder, int mode) static int psb_intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_psb_private *dev_priv = connector->dev->dev_private; struct psb_intel_sdvo *psb_intel_sdvo = intel_attached_sdvo(connector); if (mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -1160,6 +1161,11 @@ static int psb_intel_sdvo_mode_valid(struct drm_connector *connector, return MODE_PANEL; } + /* We assume worst case scenario of 32 bpp here, since we don't know */ + if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) > + dev_priv->vram_stolen_size) + return MODE_MEM; + return MODE_OK; } diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 525744d593c..7814a760c16 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -18,12 +18,6 @@ #include <linux/vga_switcheroo.h> -#define NOUVEAU_DSM_SUPPORTED 0x00 -#define NOUVEAU_DSM_SUPPORTED_FUNCTIONS 0x00 - -#define NOUVEAU_DSM_ACTIVE 0x01 -#define NOUVEAU_DSM_ACTIVE_QUERY 0x00 - #define NOUVEAU_DSM_LED 0x02 #define NOUVEAU_DSM_LED_STATE 0x00 #define NOUVEAU_DSM_LED_OFF 0x10 @@ -35,6 +29,9 @@ #define NOUVEAU_DSM_POWER_SPEED 0x01 #define NOUVEAU_DSM_POWER_STAMINA 0x02 +#define NOUVEAU_DSM_OPTIMUS_FN 0x1A +#define NOUVEAU_DSM_OPTIMUS_ARGS 0x03000001 + static struct nouveau_dsm_priv { bool dsm_detected; bool optimus_detected; @@ -61,7 +58,8 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t * struct acpi_object_list input; union acpi_object params[4]; union acpi_object *obj; - int err; + int i, err; + char args_buff[4]; input.count = 4; input.pointer = params; @@ -73,7 +71,11 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t * params[2].type = ACPI_TYPE_INTEGER; params[2].integer.value = func; params[3].type = ACPI_TYPE_BUFFER; - params[3].buffer.length = 0; + params[3].buffer.length = 4; + /* ACPI is little endian, AABBCCDD becomes {DD,CC,BB,AA} */ + for (i = 0; i < 4; i++) + args_buff[i] = (arg >> i * 8) & 0xFF; + params[3].buffer.pointer = args_buff; err = acpi_evaluate_object(handle, "_DSM", &input, &output); if (err) { @@ -148,6 +150,23 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg, uint32_t *result) return 0; } +/* Returns 1 if a DSM function is usable and 0 otherwise */ +static int nouveau_test_dsm(acpi_handle test_handle, + int (*dsm_func)(acpi_handle, int, int, uint32_t *), + int sfnc) +{ + u32 result = 0; + + /* Function 0 returns a Buffer containing available functions. The args + * parameter is ignored for function 0, so just put 0 in it */ + if (dsm_func(test_handle, 0, 0, &result)) + return 0; + + /* ACPI Spec v4 9.14.1: if bit 0 is zero, no function is supported. If + * the n-th bit is enabled, function n is supported */ + return result & 1 && result & (1 << sfnc); +} + static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id) { mxm_wmi_call_mxmx(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0); @@ -168,6 +187,10 @@ static int nouveau_dsm_set_discrete_state(acpi_handle handle, enum vga_switchero static int nouveau_dsm_switchto(enum vga_switcheroo_client_id id) { + /* perhaps the _DSM functions are mutually exclusive, but prepare for + * the future */ + if (!nouveau_dsm_priv.dsm_detected && nouveau_dsm_priv.optimus_detected) + return 0; if (id == VGA_SWITCHEROO_IGD) return nouveau_dsm_switch_mux(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_LED_STAMINA); else @@ -180,6 +203,11 @@ static int nouveau_dsm_power_state(enum vga_switcheroo_client_id id, if (id == VGA_SWITCHEROO_IGD) return 0; + /* Optimus laptops have the card already disabled in + * nouveau_switcheroo_set_state */ + if (!nouveau_dsm_priv.dsm_detected && nouveau_dsm_priv.optimus_detected) + return 0; + return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dhandle, state); } @@ -212,8 +240,7 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev) { acpi_handle dhandle, nvidia_handle; acpi_status status; - int ret, retval = 0; - uint32_t result; + int retval = 0; dhandle = DEVICE_ACPI_HANDLE(&pdev->dev); if (!dhandle) @@ -224,13 +251,11 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev) return false; } - ret = nouveau_dsm(dhandle, NOUVEAU_DSM_SUPPORTED, - NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &result); - if (ret == 0) + if (nouveau_test_dsm(dhandle, nouveau_dsm, NOUVEAU_DSM_POWER)) retval |= NOUVEAU_DSM_HAS_MUX; - ret = nouveau_optimus_dsm(dhandle, 0, 0, &result); - if (ret == 0) + if (nouveau_test_dsm(dhandle, nouveau_optimus_dsm, + NOUVEAU_DSM_OPTIMUS_FN)) retval |= NOUVEAU_DSM_HAS_OPT; if (retval) @@ -269,15 +294,22 @@ static bool nouveau_dsm_detect(void) } if (vga_count == 2 && has_dsm && guid_valid) { - acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME, &buffer); + acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME, + &buffer); printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n", - acpi_method_name); + acpi_method_name); nouveau_dsm_priv.dsm_detected = true; ret = true; } - if (has_optimus == 1) + if (has_optimus == 1) { + acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME, + &buffer); + printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s handle\n", + acpi_method_name); nouveau_dsm_priv.optimus_detected = true; + ret = true; + } return ret; } @@ -293,6 +325,17 @@ void nouveau_register_dsm_handler(void) vga_switcheroo_register_handler(&nouveau_dsm_handler); } +/* Must be called for Optimus models before the card can be turned off */ +void nouveau_switcheroo_optimus_dsm(void) +{ + u32 result = 0; + if (!nouveau_dsm_priv.optimus_detected) + return; + + nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_FN, + NOUVEAU_DSM_OPTIMUS_ARGS, &result); +} + void nouveau_unregister_dsm_handler(void) { vga_switcheroo_unregister_handler(); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 38134a9c757..b8270982893 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -1055,12 +1055,14 @@ extern int nouveau_dma_wait(struct nouveau_channel *, int slots, int size); #if defined(CONFIG_ACPI) void nouveau_register_dsm_handler(void); void nouveau_unregister_dsm_handler(void); +void nouveau_switcheroo_optimus_dsm(void); int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len); bool nouveau_acpi_rom_supported(struct pci_dev *pdev); int nouveau_acpi_edid(struct drm_device *, struct drm_connector *); #else static inline void nouveau_register_dsm_handler(void) {} static inline void nouveau_unregister_dsm_handler(void) {} +static inline void nouveau_switcheroo_optimus_dsm(void) {} static inline bool nouveau_acpi_rom_supported(struct pci_dev *pdev) { return false; } static inline int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len) { return -EINVAL; } static inline int nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) { return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index f5e98910d17..f80c5e0762f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -525,6 +525,7 @@ static void nouveau_switcheroo_set_state(struct pci_dev *pdev, printk(KERN_ERR "VGA switcheroo: switched nouveau off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_kms_helper_poll_disable(dev); + nouveau_switcheroo_optimus_dsm(); nouveau_pci_suspend(pdev, pmm); dev->switch_power_state = DRM_SWITCH_POWER_OFF; } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index f7442e62c03..8e8cd85e5c0 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -1793,10 +1793,12 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) ret = -EINVAL; break; case PACKET_TYPE2: + idx += 1; break; case PACKET_TYPE3: pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]); ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt); + idx += pkt.count + 2; break; default: dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type); @@ -1805,7 +1807,6 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) } if (ret) break; - idx += pkt.count + 2; } while (idx < ib->length_dw); return ret; diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 3ec81c3d510..bfd36ab643a 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2186,7 +2186,6 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) void r100_bm_disable(struct radeon_device *rdev) { u32 tmp; - u16 tmp16; /* disable bus mastering */ tmp = RREG32(R_000030_BUS_CNTL); @@ -2197,8 +2196,7 @@ void r100_bm_disable(struct radeon_device *rdev) WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); tmp = RREG32(RADEON_BUS_CNTL); mdelay(1); - pci_read_config_word(rdev->pdev, 0x4, &tmp16); - pci_write_config_word(rdev->pdev, 0x4, tmp16 & 0xFFFB); + pci_clear_master(rdev->pdev); mdelay(1); } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 31da622eef6..8032f1fedb1 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -145,7 +145,7 @@ module_param_named(vramlimit, radeon_vram_limit, int, 0600); MODULE_PARM_DESC(agpmode, "AGP Mode (-1 == PCI)"); module_param_named(agpmode, radeon_agpmode, int, 0444); -MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32,64, etc)\n"); +MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc)"); module_param_named(gartsize, radeon_gart_size, int, 0600); MODULE_PARM_DESC(benchmark, "Run benchmark"); diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 803e0d3c177..ec46eb45e34 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -322,16 +322,6 @@ void rs600_hpd_fini(struct radeon_device *rdev) } } -void rs600_bm_disable(struct radeon_device *rdev) -{ - u16 tmp; - - /* disable bus mastering */ - pci_read_config_word(rdev->pdev, 0x4, &tmp); - pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB); - mdelay(1); -} - int rs600_asic_reset(struct radeon_device *rdev) { struct rv515_mc_save save; @@ -355,7 +345,8 @@ int rs600_asic_reset(struct radeon_device *rdev) WREG32(RADEON_CP_RB_CNTL, tmp); pci_save_state(rdev->pdev); /* disable bus mastering */ - rs600_bm_disable(rdev); + pci_clear_master(rdev->pdev); + mdelay(1); /* reset GA+VAP */ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_VAP(1) | S_0000F0_SOFT_RESET_GA(1)); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 37ead6995c8..0c46d8cdc6e 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -952,10 +952,9 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) type = ttm_to_type(ttm->page_flags, ttm->caching_state); pool = ttm_dma_find_pool(dev, type); - if (!pool) { - WARN_ON(!pool); + if (!pool) return; - } + is_cached = (ttm_dma_find_pool(pool->dev, ttm_to_type(ttm->page_flags, tt_cached)) == pool); diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index b6807db7b36..e66d248fc12 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -132,7 +132,8 @@ #define ALI1535_SMBIO_EN 0x04 /* SMB I/O Space enable */ static struct pci_driver ali1535_driver; -static unsigned short ali1535_smba; +static unsigned long ali1535_smba; +static unsigned short ali1535_offset; /* Detect whether a ALI1535 can be found, and initialize it, where necessary. Note the differences between kernels with the old PCI BIOS interface and @@ -140,7 +141,7 @@ static unsigned short ali1535_smba; defined to make the transition easier. */ static int __devinit ali1535_setup(struct pci_dev *dev) { - int retval = -ENODEV; + int retval; unsigned char temp; /* Check the following things: @@ -149,15 +150,28 @@ static int __devinit ali1535_setup(struct pci_dev *dev) - We can use the addresses */ + retval = pci_enable_device(dev); + if (retval) { + dev_err(&dev->dev, "ALI1535_smb can't enable device\n"); + goto exit; + } + /* Determine the address of the SMBus area */ - pci_read_config_word(dev, SMBBA, &ali1535_smba); - ali1535_smba &= (0xffff & ~(ALI1535_SMB_IOSIZE - 1)); - if (ali1535_smba == 0) { + pci_read_config_word(dev, SMBBA, &ali1535_offset); + dev_dbg(&dev->dev, "ALI1535_smb is at offset 0x%04x\n", ali1535_offset); + ali1535_offset &= (0xffff & ~(ALI1535_SMB_IOSIZE - 1)); + if (ali1535_offset == 0) { dev_warn(&dev->dev, "ALI1535_smb region uninitialized - upgrade BIOS?\n"); + retval = -ENODEV; goto exit; } + if (pci_resource_flags(dev, 0) & IORESOURCE_IO) + ali1535_smba = pci_resource_start(dev, 0) + ali1535_offset; + else + ali1535_smba = ali1535_offset; + retval = acpi_check_region(ali1535_smba, ALI1535_SMB_IOSIZE, ali1535_driver.name); if (retval) @@ -165,8 +179,9 @@ static int __devinit ali1535_setup(struct pci_dev *dev) if (!request_region(ali1535_smba, ALI1535_SMB_IOSIZE, ali1535_driver.name)) { - dev_err(&dev->dev, "ALI1535_smb region 0x%x already in use!\n", + dev_err(&dev->dev, "ALI1535_smb region 0x%lx already in use!\n", ali1535_smba); + retval = -EBUSY; goto exit; } @@ -174,6 +189,7 @@ static int __devinit ali1535_setup(struct pci_dev *dev) pci_read_config_byte(dev, SMBCFG, &temp); if ((temp & ALI1535_SMBIO_EN) == 0) { dev_err(&dev->dev, "SMB device not enabled - upgrade BIOS?\n"); + retval = -ENODEV; goto exit_free; } @@ -181,6 +197,7 @@ static int __devinit ali1535_setup(struct pci_dev *dev) pci_read_config_byte(dev, SMBHSTCFG, &temp); if ((temp & 1) == 0) { dev_err(&dev->dev, "SMBus controller not enabled - upgrade BIOS?\n"); + retval = -ENODEV; goto exit_free; } @@ -196,14 +213,13 @@ static int __devinit ali1535_setup(struct pci_dev *dev) */ pci_read_config_byte(dev, SMBREV, &temp); dev_dbg(&dev->dev, "SMBREV = 0x%X\n", temp); - dev_dbg(&dev->dev, "ALI1535_smba = 0x%X\n", ali1535_smba); + dev_dbg(&dev->dev, "ALI1535_smba = 0x%lx\n", ali1535_smba); - retval = 0; -exit: - return retval; + return 0; exit_free: release_region(ali1535_smba, ALI1535_SMB_IOSIZE); +exit: return retval; } @@ -498,7 +514,7 @@ static int __devinit ali1535_probe(struct pci_dev *dev, const struct pci_device_ ali1535_adapter.dev.parent = &dev->dev; snprintf(ali1535_adapter.name, sizeof(ali1535_adapter.name), - "SMBus ALI1535 adapter at %04x", ali1535_smba); + "SMBus ALI1535 adapter at %04x", ali1535_offset); return i2c_add_adapter(&ali1535_adapter); } diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c index a409cfcf062..47ae0091e02 100644 --- a/drivers/i2c/busses/i2c-ali1563.c +++ b/drivers/i2c/busses/i2c-ali1563.c @@ -417,7 +417,7 @@ static void __devexit ali1563_remove(struct pci_dev * dev) ali1563_shutdown(dev); } -static const struct pci_device_id ali1563_id_table[] __devinitconst = { +static DEFINE_PCI_DEVICE_TABLE(ali1563_id_table) = { { PCI_DEVICE(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1563) }, {}, }; diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 83e8a60cdc8..087ea9caa74 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -477,7 +477,7 @@ static struct i2c_adapter ali15x3_adapter = { .algo = &smbus_algorithm, }; -static const struct pci_device_id ali15x3_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(ali15x3_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c index 03bcd07c469..eb778bf15c1 100644 --- a/drivers/i2c/busses/i2c-amd756.c +++ b/drivers/i2c/busses/i2c-amd756.c @@ -308,7 +308,7 @@ static const char* chipname[] = { "nVidia nForce", "AMD8111", }; -static const struct pci_device_id amd756_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(amd756_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B), .driver_data = AMD756 }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7413), diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 6b6a6b1d702..e5ac53b99b0 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -415,7 +415,7 @@ static const struct i2c_algorithm smbus_algorithm = { }; -static const struct pci_device_id amd8111_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(amd8111_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS2) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index 305c07504f7..1679deef9c8 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -295,9 +295,6 @@ static int at91_i2c_resume(struct platform_device *pdev) #define at91_i2c_resume NULL #endif -/* work with "modprobe at91_i2c" from hotplugging or coldplugging */ -MODULE_ALIAS("platform:at91_i2c"); - static struct platform_driver at91_i2c_driver = { .probe = at91_i2c_probe, .remove = __devexit_p(at91_i2c_remove), @@ -309,19 +306,9 @@ static struct platform_driver at91_i2c_driver = { }, }; -static int __init at91_i2c_init(void) -{ - return platform_driver_register(&at91_i2c_driver); -} - -static void __exit at91_i2c_exit(void) -{ - platform_driver_unregister(&at91_i2c_driver); -} - -module_init(at91_i2c_init); -module_exit(at91_i2c_exit); +module_platform_driver(at91_i2c_driver); MODULE_AUTHOR("Rick Bronson"); MODULE_DESCRIPTION("I2C (TWI) driver for Atmel AT91"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:at91_i2c"); diff --git a/drivers/i2c/busses/i2c-au1550.c b/drivers/i2c/busses/i2c-au1550.c index f314d7f433d..582d616db34 100644 --- a/drivers/i2c/busses/i2c-au1550.c +++ b/drivers/i2c/busses/i2c-au1550.c @@ -426,20 +426,9 @@ static struct platform_driver au1xpsc_smbus_driver = { .remove = __devexit_p(i2c_au1550_remove), }; -static int __init i2c_au1550_init(void) -{ - return platform_driver_register(&au1xpsc_smbus_driver); -} - -static void __exit i2c_au1550_exit(void) -{ - platform_driver_unregister(&au1xpsc_smbus_driver); -} +module_platform_driver(au1xpsc_smbus_driver); MODULE_AUTHOR("Dan Malek, Embedded Edge, LLC."); MODULE_DESCRIPTION("SMBus adapter Alchemy pb1550"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:au1xpsc_smbus"); - -module_init (i2c_au1550_init); -module_exit (i2c_au1550_exit); diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index b1d9cd28d8d..c1e1096ba06 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -724,18 +724,7 @@ static struct platform_driver cpm_i2c_driver = { }, }; -static int __init cpm_i2c_init(void) -{ - return platform_driver_register(&cpm_i2c_driver); -} - -static void __exit cpm_i2c_exit(void) -{ - platform_driver_unregister(&cpm_i2c_driver); -} - -module_init(cpm_i2c_init); -module_exit(cpm_i2c_exit); +module_platform_driver(cpm_i2c_driver); MODULE_AUTHOR("Jochen Friedrich <jochen@scram.de>"); MODULE_DESCRIPTION("I2C-Bus adapter routines for CPM boards"); diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 9e89e7313d6..37f42113af3 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -349,7 +349,7 @@ static void __devexit i2c_dw_pci_remove(struct pci_dev *pdev) /* work with hotplug and coldplug */ MODULE_ALIAS("i2c_designware-pci"); -DEFINE_PCI_DEVICE_TABLE(i2_designware_pci_ids) = { +static DEFINE_PCI_DEVICE_TABLE(i2_designware_pci_ids) = { /* Moorestown */ { PCI_VDEVICE(INTEL, 0x0802), moorestown_0 }, { PCI_VDEVICE(INTEL, 0x0803), moorestown_1 }, diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 18936ac9d51..3ef3557b6e3 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -185,7 +185,7 @@ static DEFINE_MUTEX(pch_mutex); #define PCI_DEVICE_ID_ML7213_I2C 0x802D #define PCI_DEVICE_ID_ML7223_I2C 0x8010 -static struct pci_device_id __devinitdata pch_pcidev_id[] = { +static DEFINE_PCI_DEVICE_TABLE(pch_pcidev_id) = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_I2C), 1, }, { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_I2C), 2, }, { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_I2C), 1, }, diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c index fa88868cb55..19515df6102 100644 --- a/drivers/i2c/busses/i2c-highlander.c +++ b/drivers/i2c/busses/i2c-highlander.c @@ -468,18 +468,7 @@ static struct platform_driver highlander_i2c_driver = { .remove = __devexit_p(highlander_i2c_remove), }; -static int __init highlander_i2c_init(void) -{ - return platform_driver_register(&highlander_i2c_driver); -} - -static void __exit highlander_i2c_exit(void) -{ - platform_driver_unregister(&highlander_i2c_driver); -} - -module_init(highlander_i2c_init); -module_exit(highlander_i2c_exit); +module_platform_driver(highlander_i2c_driver); MODULE_AUTHOR("Paul Mundt"); MODULE_DESCRIPTION("Renesas Highlander FPGA I2C/SMBus adapter"); diff --git a/drivers/i2c/busses/i2c-hydra.c b/drivers/i2c/busses/i2c-hydra.c index 9ff1695d845..c527de17db4 100644 --- a/drivers/i2c/busses/i2c-hydra.c +++ b/drivers/i2c/busses/i2c-hydra.c @@ -105,7 +105,7 @@ static struct i2c_adapter hydra_adap = { .algo_data = &hydra_bit_data, }; -static const struct pci_device_id hydra_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(hydra_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_HYDRA) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ab26840d0c7..5d2e2816831 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -609,7 +609,7 @@ static const struct i2c_algorithm smbus_algorithm = { .functionality = i801_func, }; -static const struct pci_device_id i801_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(i801_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_3) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_3) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_2) }, diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index c08ceb957aa..806e225f3de 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -815,15 +815,4 @@ static struct platform_driver ibm_iic_driver = { .remove = __devexit_p(iic_remove), }; -static int __init iic_init(void) -{ - return platform_driver_register(&ibm_iic_driver); -} - -static void __exit iic_exit(void) -{ - platform_driver_unregister(&ibm_iic_driver); -} - -module_init(iic_init); -module_exit(iic_exit); +module_platform_driver(ibm_iic_driver); diff --git a/drivers/i2c/busses/i2c-intel-mid.c b/drivers/i2c/busses/i2c-intel-mid.c index e828ac85cfa..365bad5b890 100644 --- a/drivers/i2c/busses/i2c-intel-mid.c +++ b/drivers/i2c/busses/i2c-intel-mid.c @@ -1093,7 +1093,7 @@ static void __devexit intel_mid_i2c_remove(struct pci_dev *dev) pci_release_region(dev, 0); } -static struct pci_device_id intel_mid_i2c_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(intel_mid_i2c_ids) = { /* Moorestown */ { PCI_VDEVICE(INTEL, 0x0802), 0 }, { PCI_VDEVICE(INTEL, 0x0803), 1 }, diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c index f09c9319a2b..93f147a96b6 100644 --- a/drivers/i2c/busses/i2c-iop3xx.c +++ b/drivers/i2c/busses/i2c-iop3xx.c @@ -523,21 +523,7 @@ static struct platform_driver iop3xx_i2c_driver = { }, }; -static int __init -i2c_iop3xx_init (void) -{ - return platform_driver_register(&iop3xx_i2c_driver); -} - -static void __exit -i2c_iop3xx_exit (void) -{ - platform_driver_unregister(&iop3xx_i2c_driver); - return; -} - -module_init (i2c_iop3xx_init); -module_exit (i2c_iop3xx_exit); +module_platform_driver(iop3xx_i2c_driver); MODULE_AUTHOR("D-TACQ Solutions Ltd <www.d-tacq.com>"); MODULE_DESCRIPTION("IOP3xx iic algorithm and driver"); diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c index 0682f8f277b..6561d275b8c 100644 --- a/drivers/i2c/busses/i2c-isch.c +++ b/drivers/i2c/busses/i2c-isch.c @@ -306,20 +306,9 @@ static struct platform_driver smbus_sch_driver = { .remove = __devexit_p(smbus_sch_remove), }; -static int __init i2c_sch_init(void) -{ - return platform_driver_register(&smbus_sch_driver); -} - -static void __exit i2c_sch_exit(void) -{ - platform_driver_unregister(&smbus_sch_driver); -} +module_platform_driver(smbus_sch_driver); MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>"); MODULE_DESCRIPTION("Intel SCH SMBus driver"); MODULE_LICENSE("GPL"); - -module_init(i2c_sch_init); -module_exit(i2c_sch_exit); MODULE_ALIAS("platform:isch_smbus"); diff --git a/drivers/i2c/busses/i2c-ixp2000.c b/drivers/i2c/busses/i2c-ixp2000.c index c01e9519f6c..5d263f9014d 100644 --- a/drivers/i2c/busses/i2c-ixp2000.c +++ b/drivers/i2c/busses/i2c-ixp2000.c @@ -148,18 +148,7 @@ static struct platform_driver ixp2000_i2c_driver = { }, }; -static int __init ixp2000_i2c_init(void) -{ - return platform_driver_register(&ixp2000_i2c_driver); -} - -static void __exit ixp2000_i2c_exit(void) -{ - platform_driver_unregister(&ixp2000_i2c_driver); -} - -module_init(ixp2000_i2c_init); -module_exit(ixp2000_i2c_exit); +module_platform_driver(ixp2000_i2c_driver); MODULE_AUTHOR ("Deepak Saxena <dsaxena@plexity.net>"); MODULE_DESCRIPTION("IXP2000 GPIO-based I2C bus driver"); diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 107397a606b..a8ebb84e23f 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -715,18 +715,7 @@ static struct platform_driver mpc_i2c_driver = { }, }; -static int __init fsl_i2c_init(void) -{ - return platform_driver_register(&mpc_i2c_driver); -} - -static void __exit fsl_i2c_exit(void) -{ - platform_driver_unregister(&mpc_i2c_driver); -} - -module_init(fsl_i2c_init); -module_exit(fsl_i2c_exit); +module_platform_driver(mpc_i2c_driver); MODULE_AUTHOR("Adrian Cox <adrian@humboldt.co.uk>"); MODULE_DESCRIPTION("I2C-Bus adapter for MPC107 bridge and " diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index a9941c65f22..4f44a33017b 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -611,20 +611,7 @@ static struct platform_driver mv64xxx_i2c_driver = { }, }; -static int __init -mv64xxx_i2c_init(void) -{ - return platform_driver_register(&mv64xxx_i2c_driver); -} - -static void __exit -mv64xxx_i2c_exit(void) -{ - platform_driver_unregister(&mv64xxx_i2c_driver); -} - -module_init(mv64xxx_i2c_init); -module_exit(mv64xxx_i2c_exit); +module_platform_driver(mv64xxx_i2c_driver); MODULE_AUTHOR("Mark A. Greer <mgreer@mvista.com>"); MODULE_DESCRIPTION("Marvell mv64xxx host bridge i2c ctlr driver"); diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c index ff1e127dfea..43a96a12392 100644 --- a/drivers/i2c/busses/i2c-nforce2.c +++ b/drivers/i2c/busses/i2c-nforce2.c @@ -309,7 +309,7 @@ static struct i2c_algorithm smbus_algorithm = { }; -static const struct pci_device_id nforce2_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(nforce2_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2S_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS) }, @@ -356,7 +356,7 @@ static int __devinit nforce2_probe_smb (struct pci_dev *dev, int bar, error = acpi_check_region(smbus->base, smbus->size, nforce2_driver.name); if (error) - return -1; + return error; if (!request_region(smbus->base, smbus->size, nforce2_driver.name)) { dev_err(&smbus->adapter.dev, "Error requesting region %02x .. %02X for %s\n", diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 1b46a9d9f90..18068dee48f 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -394,9 +394,6 @@ static struct of_device_id ocores_i2c_match[] = { }; MODULE_DEVICE_TABLE(of, ocores_i2c_match); -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:ocores-i2c"); - static struct platform_driver ocores_i2c_driver = { .probe = ocores_i2c_probe, .remove = __devexit_p(ocores_i2c_remove), @@ -409,19 +406,9 @@ static struct platform_driver ocores_i2c_driver = { }, }; -static int __init ocores_i2c_init(void) -{ - return platform_driver_register(&ocores_i2c_driver); -} - -static void __exit ocores_i2c_exit(void) -{ - platform_driver_unregister(&ocores_i2c_driver); -} - -module_init(ocores_i2c_init); -module_exit(ocores_i2c_exit); +module_platform_driver(ocores_i2c_driver); MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>"); MODULE_DESCRIPTION("OpenCores I2C bus driver"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ocores-i2c"); diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c index 56dbe54e881..ee139a59881 100644 --- a/drivers/i2c/busses/i2c-octeon.c +++ b/drivers/i2c/busses/i2c-octeon.c @@ -629,24 +629,10 @@ static struct platform_driver octeon_i2c_driver = { }, }; -static int __init octeon_i2c_init(void) -{ - int rv; - - rv = platform_driver_register(&octeon_i2c_driver); - return rv; -} - -static void __exit octeon_i2c_exit(void) -{ - platform_driver_unregister(&octeon_i2c_driver); -} +module_platform_driver(octeon_i2c_driver); MODULE_AUTHOR("Michael Lawnick <michael.lawnick.ext@nsn.com>"); MODULE_DESCRIPTION("I2C-Bus adapter for Cavium OCTEON processors"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:" DRV_NAME); - -module_init(octeon_i2c_init); -module_exit(octeon_i2c_exit); diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c index 837b8c1aa02..eaaea73209c 100644 --- a/drivers/i2c/busses/i2c-pasemi.c +++ b/drivers/i2c/busses/i2c-pasemi.c @@ -401,7 +401,7 @@ static void __devexit pasemi_smb_remove(struct pci_dev *dev) kfree(smbus); } -static const struct pci_device_id pasemi_smb_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(pasemi_smb_ids) = { { PCI_DEVICE(0x1959, 0xa003) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c index ace67995d7d..2adbf1a8fde 100644 --- a/drivers/i2c/busses/i2c-pca-platform.c +++ b/drivers/i2c/busses/i2c-pca-platform.c @@ -286,20 +286,8 @@ static struct platform_driver i2c_pca_pf_driver = { }, }; -static int __init i2c_pca_pf_init(void) -{ - return platform_driver_register(&i2c_pca_pf_driver); -} - -static void __exit i2c_pca_pf_exit(void) -{ - platform_driver_unregister(&i2c_pca_pf_driver); -} +module_platform_driver(i2c_pca_pf_driver); MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>"); MODULE_DESCRIPTION("I2C-PCA9564/PCA9665 platform driver"); MODULE_LICENSE("GPL"); - -module_init(i2c_pca_pf_init); -module_exit(i2c_pca_pf_exit); - diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 6d14ac2e3c4..c14d48dd601 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -472,7 +472,7 @@ static struct i2c_adapter piix4_adapter = { .algo = &smbus_algorithm, }; -static const struct pci_device_id piix4_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(piix4_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_3) }, { PCI_DEVICE(PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_3) }, diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c index 127051b0692..07b7447ecbc 100644 --- a/drivers/i2c/busses/i2c-pmcmsp.c +++ b/drivers/i2c/busses/i2c-pmcmsp.c @@ -627,9 +627,6 @@ static struct i2c_adapter pmcmsptwi_adapter = { .name = DRV_NAME, }; -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:" DRV_NAME); - static struct platform_driver pmcmsptwi_driver = { .probe = pmcmsptwi_probe, .remove = __devexit_p(pmcmsptwi_remove), @@ -639,18 +636,8 @@ static struct platform_driver pmcmsptwi_driver = { }, }; -static int __init pmcmsptwi_init(void) -{ - return platform_driver_register(&pmcmsptwi_driver); -} - -static void __exit pmcmsptwi_exit(void) -{ - platform_driver_unregister(&pmcmsptwi_driver); -} +module_platform_driver(pmcmsptwi_driver); MODULE_DESCRIPTION("PMC MSP TWI/SMBus/I2C driver"); MODULE_LICENSE("GPL"); - -module_init(pmcmsptwi_init); -module_exit(pmcmsptwi_exit); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c index b289ec99eeb..7b397c6f607 100644 --- a/drivers/i2c/busses/i2c-powermac.c +++ b/drivers/i2c/busses/i2c-powermac.c @@ -312,10 +312,6 @@ static int __devinit i2c_powermac_probe(struct platform_device *dev) return rc; } - -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:i2c-powermac"); - static struct platform_driver i2c_powermac_driver = { .probe = i2c_powermac_probe, .remove = __devexit_p(i2c_powermac_remove), @@ -325,17 +321,6 @@ static struct platform_driver i2c_powermac_driver = { }, }; -static int __init i2c_powermac_init(void) -{ - platform_driver_register(&i2c_powermac_driver); - return 0; -} +module_platform_driver(i2c_powermac_driver); - -static void __exit i2c_powermac_cleanup(void) -{ - platform_driver_unregister(&i2c_powermac_driver); -} - -module_init(i2c_powermac_init); -module_exit(i2c_powermac_cleanup); +MODULE_ALIAS("platform:i2c-powermac"); diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c index 632e088760a..a0581798055 100644 --- a/drivers/i2c/busses/i2c-pxa-pci.c +++ b/drivers/i2c/busses/i2c-pxa-pci.c @@ -150,7 +150,7 @@ static void __devexit ce4100_i2c_remove(struct pci_dev *dev) kfree(sds); } -static struct pci_device_id ce4100_i2c_devices[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(ce4100_i2c_devices) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2e68)}, { }, }; diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index a67132b2e09..c0c9dffbdb1 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -560,18 +560,7 @@ static struct platform_driver sh7760_i2c_drv = { .remove = __devexit_p(sh7760_i2c_remove), }; -static int __init sh7760_i2c_init(void) -{ - return platform_driver_register(&sh7760_i2c_drv); -} - -static void __exit sh7760_i2c_exit(void) -{ - platform_driver_unregister(&sh7760_i2c_drv); -} - -module_init(sh7760_i2c_init); -module_exit(sh7760_i2c_exit); +module_platform_driver(sh7760_i2c_drv); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SH7760 I2C bus driver"); diff --git a/drivers/i2c/busses/i2c-simtec.c b/drivers/i2c/busses/i2c-simtec.c index 2fc08fbf67a..4fc87e7c94c 100644 --- a/drivers/i2c/busses/i2c-simtec.c +++ b/drivers/i2c/busses/i2c-simtec.c @@ -156,12 +156,8 @@ static int simtec_i2c_remove(struct platform_device *dev) return 0; } - /* device driver */ -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:simtec-i2c"); - static struct platform_driver simtec_i2c_driver = { .driver = { .name = "simtec-i2c", @@ -171,19 +167,9 @@ static struct platform_driver simtec_i2c_driver = { .remove = simtec_i2c_remove, }; -static int __init i2c_adap_simtec_init(void) -{ - return platform_driver_register(&simtec_i2c_driver); -} - -static void __exit i2c_adap_simtec_exit(void) -{ - platform_driver_unregister(&simtec_i2c_driver); -} - -module_init(i2c_adap_simtec_init); -module_exit(i2c_adap_simtec_exit); +module_platform_driver(simtec_i2c_driver); MODULE_DESCRIPTION("Simtec Generic I2C Bus driver"); MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:simtec-i2c"); diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c index 437586611d4..87e5126d449 100644 --- a/drivers/i2c/busses/i2c-sis5595.c +++ b/drivers/i2c/busses/i2c-sis5595.c @@ -147,7 +147,7 @@ static int __devinit sis5595_setup(struct pci_dev *SIS5595_dev) u16 a; u8 val; int *i; - int retval = -ENODEV; + int retval; /* Look for imposters */ for (i = blacklist; *i != 0; i++) { @@ -223,7 +223,7 @@ static int __devinit sis5595_setup(struct pci_dev *SIS5595_dev) error: release_region(sis5595_base + SMB_INDEX, 2); - return retval; + return -ENODEV; } static int sis5595_transaction(struct i2c_adapter *adap) @@ -369,7 +369,7 @@ static struct i2c_adapter sis5595_adapter = { .algo = &smbus_algorithm, }; -static const struct pci_device_id sis5595_ids[] __devinitconst = { +static DEFINE_PCI_DEVICE_TABLE(sis5595_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index 58893772c3d..15cf78f65ce 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -393,7 +393,7 @@ static int __devinit sis630_setup(struct pci_dev *sis630_dev) { unsigned char b; struct pci_dev *dummy = NULL; - int retval = -ENODEV, i; + int retval, i; /* check for supported SiS devices */ for (i=0; supported[i] > 0 ; i++) { @@ -418,18 +418,21 @@ static int __devinit sis630_setup(struct pci_dev *sis630_dev) */ if (pci_read_config_byte(sis630_dev, SIS630_BIOS_CTL_REG,&b)) { dev_err(&sis630_dev->dev, "Error: Can't read bios ctl reg\n"); + retval = -ENODEV; goto exit; } /* if ACPI already enabled , do nothing */ if (!(b & 0x80) && pci_write_config_byte(sis630_dev, SIS630_BIOS_CTL_REG, b | 0x80)) { dev_err(&sis630_dev->dev, "Error: Can't enable ACPI\n"); + retval = -ENODEV; goto exit; } /* Determine the ACPI base address */ if (pci_read_config_word(sis630_dev,SIS630_ACPI_BASE_REG,&acpi_base)) { dev_err(&sis630_dev->dev, "Error: Can't determine ACPI base address\n"); + retval = -ENODEV; goto exit; } @@ -445,6 +448,7 @@ static int __devinit sis630_setup(struct pci_dev *sis630_dev) sis630_driver.name)) { dev_err(&sis630_dev->dev, "SMBus registers 0x%04x-0x%04x already " "in use!\n", acpi_base + SMB_STS, acpi_base + SMB_SAA); + retval = -EBUSY; goto exit; } @@ -468,7 +472,7 @@ static struct i2c_adapter sis630_adapter = { .algo = &smbus_algorithm, }; -static const struct pci_device_id sis630_ids[] __devinitconst = { +static DEFINE_PCI_DEVICE_TABLE(sis630_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503) }, { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC) }, { 0, } diff --git a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c index 86837f0c4cb..cc5d149413f 100644 --- a/drivers/i2c/busses/i2c-sis96x.c +++ b/drivers/i2c/busses/i2c-sis96x.c @@ -245,7 +245,7 @@ static struct i2c_adapter sis96x_adapter = { .algo = &smbus_algorithm, }; -static const struct pci_device_id sis96x_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(sis96x_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_SMBUS) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-via.c b/drivers/i2c/busses/i2c-via.c index 7799fe5bda8..713d31ade26 100644 --- a/drivers/i2c/busses/i2c-via.c +++ b/drivers/i2c/busses/i2c-via.c @@ -89,7 +89,7 @@ static struct i2c_adapter vt586b_adapter = { }; -static const struct pci_device_id vt586b_ids[] __devinitconst = { +static DEFINE_PCI_DEVICE_TABLE(vt586b_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 2a62c998044..333011c83d5 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -324,7 +324,7 @@ static int __devinit vt596_probe(struct pci_dev *pdev, const struct pci_device_id *id) { unsigned char temp; - int error = -ENODEV; + int error; /* Determine the address of the SMBus areas */ if (force_addr) { @@ -390,6 +390,7 @@ found: dev_err(&pdev->dev, "SMBUS: Error: Host SMBus " "controller not enabled! - upgrade BIOS or " "use force=1\n"); + error = -ENODEV; goto release_region; } } @@ -422,9 +423,11 @@ found: "SMBus Via Pro adapter at %04x", vt596_smba); vt596_pdev = pci_dev_get(pdev); - if (i2c_add_adapter(&vt596_adapter)) { + error = i2c_add_adapter(&vt596_adapter); + if (error) { pci_dev_put(vt596_pdev); vt596_pdev = NULL; + goto release_region; } /* Always return failure here. This is to allow other drivers to bind @@ -438,7 +441,7 @@ release_region: return error; } -static const struct pci_device_id vt596_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(vt596_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596_3), .driver_data = SMBBA1 }, { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596B_3), diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index ac083a28ae0..2bded7647ef 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -795,10 +795,6 @@ static int __devexit xiic_i2c_remove(struct platform_device* pdev) return 0; } - -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:"DRIVER_NAME); - static struct platform_driver xiic_i2c_driver = { .probe = xiic_i2c_probe, .remove = __devexit_p(xiic_i2c_remove), @@ -808,19 +804,9 @@ static struct platform_driver xiic_i2c_driver = { }, }; -static int __init xiic_i2c_init(void) -{ - return platform_driver_register(&xiic_i2c_driver); -} - -static void __exit xiic_i2c_exit(void) -{ - platform_driver_unregister(&xiic_i2c_driver); -} - -module_init(xiic_i2c_init); -module_exit(xiic_i2c_exit); +module_platform_driver(xiic_i2c_driver); MODULE_AUTHOR("info@mocean-labs.com"); MODULE_DESCRIPTION("Xilinx I2C bus driver"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:"DRIVER_NAME); diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index 91e349c884c..2eacb7784d5 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -559,7 +559,7 @@ static struct platform_driver scx200_pci_driver = { .remove = __devexit_p(scx200_remove), }; -static const struct pci_device_id scx200_isa[] __initconst = { +static DEFINE_PCI_DEVICE_TABLE(scx200_isa) = { { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, { 0, } diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 57a45ce84b2..10e7f1e7658 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -251,15 +251,10 @@ static noinline int i2cdev_ioctl_rdrw(struct i2c_client *client, if (rdwr_arg.nmsgs > I2C_RDRW_IOCTL_MAX_MSGS) return -EINVAL; - rdwr_pa = kmalloc(rdwr_arg.nmsgs * sizeof(struct i2c_msg), GFP_KERNEL); - if (!rdwr_pa) - return -ENOMEM; - - if (copy_from_user(rdwr_pa, rdwr_arg.msgs, - rdwr_arg.nmsgs * sizeof(struct i2c_msg))) { - kfree(rdwr_pa); - return -EFAULT; - } + rdwr_pa = memdup_user(rdwr_arg.msgs, + rdwr_arg.nmsgs * sizeof(struct i2c_msg)); + if (IS_ERR(rdwr_pa)) + return PTR_ERR(rdwr_pa); data_ptrs = kmalloc(rdwr_arg.nmsgs * sizeof(u8 __user *), GFP_KERNEL); if (data_ptrs == NULL) { diff --git a/drivers/i2c/muxes/gpio-i2cmux.c b/drivers/i2c/muxes/gpio-i2cmux.c index 7b6ce624cd6..e5fa695eb0f 100644 --- a/drivers/i2c/muxes/gpio-i2cmux.c +++ b/drivers/i2c/muxes/gpio-i2cmux.c @@ -165,18 +165,7 @@ static struct platform_driver gpiomux_driver = { }, }; -static int __init gpiomux_init(void) -{ - return platform_driver_register(&gpiomux_driver); -} - -static void __exit gpiomux_exit(void) -{ - platform_driver_unregister(&gpiomux_driver); -} - -module_init(gpiomux_init); -module_exit(gpiomux_exit); +module_platform_driver(gpiomux_driver); MODULE_DESCRIPTION("GPIO-based I2C multiplexer driver"); MODULE_AUTHOR("Peter Korsgaard <peter.korsgaard@barco.com>"); diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c index 79d90163363..350fd0c385d 100644 --- a/drivers/input/misc/ab8500-ponkey.c +++ b/drivers/input/misc/ab8500-ponkey.c @@ -12,7 +12,7 @@ #include <linux/platform_device.h> #include <linux/input.h> #include <linux/interrupt.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/slab.h> /** diff --git a/drivers/isdn/i4l/Kconfig b/drivers/isdn/i4l/Kconfig index 9c6650ea848..2302fbe70ac 100644 --- a/drivers/isdn/i4l/Kconfig +++ b/drivers/isdn/i4l/Kconfig @@ -6,7 +6,7 @@ if ISDN_I4L config ISDN_PPP bool "Support synchronous PPP" - depends on INET + depends on INET && NETDEVICES select SLHC help Over digital connections such as ISDN, there is no need to diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 897a77dfa9d..c957c344233 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -396,6 +396,13 @@ config LEDS_TCA6507 LED driver chips accessed via the I2C bus. Driver support brightness control and hardware-assisted blinking. +config LEDS_MAX8997 + tristate "LED support for MAX8997 PMIC" + depends on LEDS_CLASS && MFD_MAX8997 + help + This option enables support for on-chip LED drivers on + MAXIM MAX8997 PMIC. + config LEDS_TRIGGERS bool "LED Trigger support" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 5c9dc4b000d..b8a9723477f 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_LEDS_NS2) += leds-ns2.o obj-$(CONFIG_LEDS_NETXBIG) += leds-netxbig.o obj-$(CONFIG_LEDS_ASIC3) += leds-asic3.o obj-$(CONFIG_LEDS_RENESAS_TPU) += leds-renesas-tpu.o +obj-$(CONFIG_LEDS_MAX8997) += leds-max8997.o # LED SPI Drivers obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o diff --git a/drivers/leds/leds-max8997.c b/drivers/leds/leds-max8997.c new file mode 100644 index 00000000000..f4c0e37fad1 --- /dev/null +++ b/drivers/leds/leds-max8997.c @@ -0,0 +1,372 @@ +/* + * leds-max8997.c - LED class driver for MAX8997 LEDs. + * + * Copyright (C) 2011 Samsung Electronics + * Donggeun Kim <dg77.kim@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include <linux/leds.h> +#include <linux/mfd/max8997.h> +#include <linux/mfd/max8997-private.h> +#include <linux/platform_device.h> + +#define MAX8997_LED_FLASH_SHIFT 3 +#define MAX8997_LED_FLASH_CUR_MASK 0xf8 +#define MAX8997_LED_MOVIE_SHIFT 4 +#define MAX8997_LED_MOVIE_CUR_MASK 0xf0 + +#define MAX8997_LED_FLASH_MAX_BRIGHTNESS 0x1f +#define MAX8997_LED_MOVIE_MAX_BRIGHTNESS 0xf +#define MAX8997_LED_NONE_MAX_BRIGHTNESS 0 + +#define MAX8997_LED0_FLASH_MASK 0x1 +#define MAX8997_LED0_FLASH_PIN_MASK 0x5 +#define MAX8997_LED0_MOVIE_MASK 0x8 +#define MAX8997_LED0_MOVIE_PIN_MASK 0x28 + +#define MAX8997_LED1_FLASH_MASK 0x2 +#define MAX8997_LED1_FLASH_PIN_MASK 0x6 +#define MAX8997_LED1_MOVIE_MASK 0x10 +#define MAX8997_LED1_MOVIE_PIN_MASK 0x30 + +#define MAX8997_LED_BOOST_ENABLE_MASK (1 << 6) + +struct max8997_led { + struct max8997_dev *iodev; + struct led_classdev cdev; + bool enabled; + int id; + enum max8997_led_mode led_mode; + struct mutex mutex; +}; + +static void max8997_led_clear_mode(struct max8997_led *led, + enum max8997_led_mode mode) +{ + struct i2c_client *client = led->iodev->i2c; + u8 val = 0, mask = 0; + int ret; + + switch (mode) { + case MAX8997_FLASH_MODE: + mask = led->id ? + MAX8997_LED1_FLASH_MASK : MAX8997_LED0_FLASH_MASK; + break; + case MAX8997_MOVIE_MODE: + mask = led->id ? + MAX8997_LED1_MOVIE_MASK : MAX8997_LED0_MOVIE_MASK; + break; + case MAX8997_FLASH_PIN_CONTROL_MODE: + mask = led->id ? + MAX8997_LED1_FLASH_PIN_MASK : MAX8997_LED0_FLASH_PIN_MASK; + break; + case MAX8997_MOVIE_PIN_CONTROL_MODE: + mask = led->id ? + MAX8997_LED1_MOVIE_PIN_MASK : MAX8997_LED0_MOVIE_PIN_MASK; + break; + default: + break; + } + + if (mask) { + ret = max8997_update_reg(client, + MAX8997_REG_LEN_CNTL, val, mask); + if (ret) + dev_err(led->iodev->dev, + "failed to update register(%d)\n", ret); + } +} + +static void max8997_led_set_mode(struct max8997_led *led, + enum max8997_led_mode mode) +{ + int ret; + struct i2c_client *client = led->iodev->i2c; + u8 mask = 0; + + /* First, clear the previous mode */ + max8997_led_clear_mode(led, led->led_mode); + + switch (mode) { + case MAX8997_FLASH_MODE: + mask = led->id ? + MAX8997_LED1_FLASH_MASK : MAX8997_LED0_FLASH_MASK; + led->cdev.max_brightness = MAX8997_LED_FLASH_MAX_BRIGHTNESS; + break; + case MAX8997_MOVIE_MODE: + mask = led->id ? + MAX8997_LED1_MOVIE_MASK : MAX8997_LED0_MOVIE_MASK; + led->cdev.max_brightness = MAX8997_LED_MOVIE_MAX_BRIGHTNESS; + break; + case MAX8997_FLASH_PIN_CONTROL_MODE: + mask = led->id ? + MAX8997_LED1_FLASH_PIN_MASK : MAX8997_LED0_FLASH_PIN_MASK; + led->cdev.max_brightness = MAX8997_LED_FLASH_MAX_BRIGHTNESS; + break; + case MAX8997_MOVIE_PIN_CONTROL_MODE: + mask = led->id ? + MAX8997_LED1_MOVIE_PIN_MASK : MAX8997_LED0_MOVIE_PIN_MASK; + led->cdev.max_brightness = MAX8997_LED_MOVIE_MAX_BRIGHTNESS; + break; + default: + led->cdev.max_brightness = MAX8997_LED_NONE_MAX_BRIGHTNESS; + break; + } + + if (mask) { + ret = max8997_update_reg(client, + MAX8997_REG_LEN_CNTL, mask, mask); + if (ret) + dev_err(led->iodev->dev, + "failed to update register(%d)\n", ret); + } + + led->led_mode = mode; +} + +static void max8997_led_enable(struct max8997_led *led, bool enable) +{ + int ret; + struct i2c_client *client = led->iodev->i2c; + u8 val = 0, mask = MAX8997_LED_BOOST_ENABLE_MASK; + + if (led->enabled == enable) + return; + + val = enable ? MAX8997_LED_BOOST_ENABLE_MASK : 0; + + ret = max8997_update_reg(client, MAX8997_REG_BOOST_CNTL, val, mask); + if (ret) + dev_err(led->iodev->dev, + "failed to update register(%d)\n", ret); + + led->enabled = enable; +} + +static void max8997_led_set_current(struct max8997_led *led, + enum led_brightness value) +{ + int ret; + struct i2c_client *client = led->iodev->i2c; + u8 val = 0, mask = 0, reg = 0; + + switch (led->led_mode) { + case MAX8997_FLASH_MODE: + case MAX8997_FLASH_PIN_CONTROL_MODE: + val = value << MAX8997_LED_FLASH_SHIFT; + mask = MAX8997_LED_FLASH_CUR_MASK; + reg = led->id ? MAX8997_REG_FLASH2_CUR : MAX8997_REG_FLASH1_CUR; + break; + case MAX8997_MOVIE_MODE: + case MAX8997_MOVIE_PIN_CONTROL_MODE: + val = value << MAX8997_LED_MOVIE_SHIFT; + mask = MAX8997_LED_MOVIE_CUR_MASK; + reg = MAX8997_REG_MOVIE_CUR; + break; + default: + break; + } + + if (mask) { + ret = max8997_update_reg(client, reg, val, mask); + if (ret) + dev_err(led->iodev->dev, + "failed to update register(%d)\n", ret); + } +} + +static void max8997_led_brightness_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct max8997_led *led = + container_of(led_cdev, struct max8997_led, cdev); + + if (value) { + max8997_led_set_current(led, value); + max8997_led_enable(led, true); + } else { + max8997_led_set_current(led, value); + max8997_led_enable(led, false); + } +} + +static ssize_t max8997_led_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct max8997_led *led = + container_of(led_cdev, struct max8997_led, cdev); + ssize_t ret = 0; + + mutex_lock(&led->mutex); + + switch (led->led_mode) { + case MAX8997_FLASH_MODE: + ret += sprintf(buf, "FLASH\n"); + break; + case MAX8997_MOVIE_MODE: + ret += sprintf(buf, "MOVIE\n"); + break; + case MAX8997_FLASH_PIN_CONTROL_MODE: + ret += sprintf(buf, "FLASH_PIN_CONTROL\n"); + break; + case MAX8997_MOVIE_PIN_CONTROL_MODE: + ret += sprintf(buf, "MOVIE_PIN_CONTROL\n"); + break; + default: + ret += sprintf(buf, "NONE\n"); + break; + } + + mutex_unlock(&led->mutex); + + return ret; +} + +static ssize_t max8997_led_store_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct max8997_led *led = + container_of(led_cdev, struct max8997_led, cdev); + enum max8997_led_mode mode; + + mutex_lock(&led->mutex); + + if (!strncmp(buf, "FLASH_PIN_CONTROL", 17)) + mode = MAX8997_FLASH_PIN_CONTROL_MODE; + else if (!strncmp(buf, "MOVIE_PIN_CONTROL", 17)) + mode = MAX8997_MOVIE_PIN_CONTROL_MODE; + else if (!strncmp(buf, "FLASH", 5)) + mode = MAX8997_FLASH_MODE; + else if (!strncmp(buf, "MOVIE", 5)) + mode = MAX8997_MOVIE_MODE; + else + mode = MAX8997_NONE; + + max8997_led_set_mode(led, mode); + + mutex_unlock(&led->mutex); + + return size; +} + +static DEVICE_ATTR(mode, 0644, max8997_led_show_mode, max8997_led_store_mode); + +static int __devinit max8997_led_probe(struct platform_device *pdev) +{ + struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent); + struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev); + struct max8997_led *led; + char name[20]; + int ret = 0; + + if (pdata == NULL) { + dev_err(&pdev->dev, "no platform data\n"); + return -ENODEV; + } + + led = kzalloc(sizeof(*led), GFP_KERNEL); + if (led == NULL) { + ret = -ENOMEM; + goto err_mem; + } + + led->id = pdev->id; + snprintf(name, sizeof(name), "max8997-led%d", pdev->id); + + led->cdev.name = name; + led->cdev.brightness_set = max8997_led_brightness_set; + led->cdev.flags |= LED_CORE_SUSPENDRESUME; + led->cdev.brightness = 0; + led->iodev = iodev; + + /* initialize mode and brightness according to platform_data */ + if (pdata->led_pdata) { + u8 mode = 0, brightness = 0; + + mode = pdata->led_pdata->mode[led->id]; + brightness = pdata->led_pdata->brightness[led->id]; + + max8997_led_set_mode(led, pdata->led_pdata->mode[led->id]); + + if (brightness > led->cdev.max_brightness) + brightness = led->cdev.max_brightness; + max8997_led_set_current(led, brightness); + led->cdev.brightness = brightness; + } else { + max8997_led_set_mode(led, MAX8997_NONE); + max8997_led_set_current(led, 0); + } + + mutex_init(&led->mutex); + + platform_set_drvdata(pdev, led); + + ret = led_classdev_register(&pdev->dev, &led->cdev); + if (ret < 0) + goto err_led; + + ret = device_create_file(led->cdev.dev, &dev_attr_mode); + if (ret != 0) { + dev_err(&pdev->dev, + "failed to create file: %d\n", ret); + goto err_file; + } + + return 0; + +err_file: + led_classdev_unregister(&led->cdev); +err_led: + kfree(led); +err_mem: + return ret; +} + +static int __devexit max8997_led_remove(struct platform_device *pdev) +{ + struct max8997_led *led = platform_get_drvdata(pdev); + + device_remove_file(led->cdev.dev, &dev_attr_mode); + led_classdev_unregister(&led->cdev); + kfree(led); + + return 0; +} + +static struct platform_driver max8997_led_driver = { + .driver = { + .name = "max8997-led", + .owner = THIS_MODULE, + }, + .probe = max8997_led_probe, + .remove = __devexit_p(max8997_led_remove), +}; + +static int __init max8997_led_init(void) +{ + return platform_driver_register(&max8997_led_driver); +} +module_init(max8997_led_init); + +static void __exit max8997_led_exit(void) +{ + platform_driver_unregister(&max8997_led_driver); +} +module_exit(max8997_led_exit); + +MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>"); +MODULE_DESCRIPTION("MAX8997 LED driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:max8997-led"); diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c index e017dc88622..f93dd9571c3 100644 --- a/drivers/mfd/88pm860x-i2c.c +++ b/drivers/mfd/88pm860x-i2c.c @@ -12,51 +12,20 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/i2c.h> +#include <linux/err.h> +#include <linux/regmap.h> #include <linux/mfd/88pm860x.h> #include <linux/slab.h> -static inline int pm860x_read_device(struct i2c_client *i2c, - int reg, int bytes, void *dest) -{ - unsigned char data; - int ret; - - data = (unsigned char)reg; - ret = i2c_master_send(i2c, &data, 1); - if (ret < 0) - return ret; - - ret = i2c_master_recv(i2c, dest, bytes); - if (ret < 0) - return ret; - return 0; -} - -static inline int pm860x_write_device(struct i2c_client *i2c, - int reg, int bytes, void *src) -{ - unsigned char buf[bytes + 1]; - int ret; - - buf[0] = (unsigned char)reg; - memcpy(&buf[1], src, bytes); - - ret = i2c_master_send(i2c, buf, bytes + 1); - if (ret < 0) - return ret; - return 0; -} - int pm860x_reg_read(struct i2c_client *i2c, int reg) { struct pm860x_chip *chip = i2c_get_clientdata(i2c); - unsigned char data; + struct regmap *map = (i2c == chip->client) ? chip->regmap + : chip->regmap_companion; + unsigned int data; int ret; - mutex_lock(&chip->io_lock); - ret = pm860x_read_device(i2c, reg, 1, &data); - mutex_unlock(&chip->io_lock); - + ret = regmap_read(map, reg, &data); if (ret < 0) return ret; else @@ -68,12 +37,11 @@ int pm860x_reg_write(struct i2c_client *i2c, int reg, unsigned char data) { struct pm860x_chip *chip = i2c_get_clientdata(i2c); + struct regmap *map = (i2c == chip->client) ? chip->regmap + : chip->regmap_companion; int ret; - mutex_lock(&chip->io_lock); - ret = pm860x_write_device(i2c, reg, 1, &data); - mutex_unlock(&chip->io_lock); - + ret = regmap_write(map, reg, data); return ret; } EXPORT_SYMBOL(pm860x_reg_write); @@ -82,12 +50,11 @@ int pm860x_bulk_read(struct i2c_client *i2c, int reg, int count, unsigned char *buf) { struct pm860x_chip *chip = i2c_get_clientdata(i2c); + struct regmap *map = (i2c == chip->client) ? chip->regmap + : chip->regmap_companion; int ret; - mutex_lock(&chip->io_lock); - ret = pm860x_read_device(i2c, reg, count, buf); - mutex_unlock(&chip->io_lock); - + ret = regmap_raw_read(map, reg, buf, count); return ret; } EXPORT_SYMBOL(pm860x_bulk_read); @@ -96,12 +63,11 @@ int pm860x_bulk_write(struct i2c_client *i2c, int reg, int count, unsigned char *buf) { struct pm860x_chip *chip = i2c_get_clientdata(i2c); + struct regmap *map = (i2c == chip->client) ? chip->regmap + : chip->regmap_companion; int ret; - mutex_lock(&chip->io_lock); - ret = pm860x_write_device(i2c, reg, count, buf); - mutex_unlock(&chip->io_lock); - + ret = regmap_raw_write(map, reg, buf, count); return ret; } EXPORT_SYMBOL(pm860x_bulk_write); @@ -110,39 +76,78 @@ int pm860x_set_bits(struct i2c_client *i2c, int reg, unsigned char mask, unsigned char data) { struct pm860x_chip *chip = i2c_get_clientdata(i2c); - unsigned char value; + struct regmap *map = (i2c == chip->client) ? chip->regmap + : chip->regmap_companion; int ret; - mutex_lock(&chip->io_lock); - ret = pm860x_read_device(i2c, reg, 1, &value); - if (ret < 0) - goto out; - value &= ~mask; - value |= data; - ret = pm860x_write_device(i2c, reg, 1, &value); -out: - mutex_unlock(&chip->io_lock); + ret = regmap_update_bits(map, reg, mask, data); return ret; } EXPORT_SYMBOL(pm860x_set_bits); +static int read_device(struct i2c_client *i2c, int reg, + int bytes, void *dest) +{ + unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX + 3]; + unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX + 2]; + struct i2c_adapter *adap = i2c->adapter; + struct i2c_msg msg[2] = {{i2c->addr, 0, 1, msgbuf0}, + {i2c->addr, I2C_M_RD, 0, msgbuf1}, + }; + int num = 1, ret = 0; + + if (dest == NULL) + return -EINVAL; + msgbuf0[0] = (unsigned char)reg; /* command */ + msg[1].len = bytes; + + /* if data needs to read back, num should be 2 */ + if (bytes > 0) + num = 2; + ret = adap->algo->master_xfer(adap, msg, num); + memcpy(dest, msgbuf1, bytes); + if (ret < 0) + return ret; + return 0; +} + +static int write_device(struct i2c_client *i2c, int reg, + int bytes, void *src) +{ + unsigned char buf[bytes + 1]; + struct i2c_adapter *adap = i2c->adapter; + struct i2c_msg msg; + int ret; + + buf[0] = (unsigned char)reg; + memcpy(&buf[1], src, bytes); + msg.addr = i2c->addr; + msg.flags = 0; + msg.len = bytes + 1; + msg.buf = buf; + + ret = adap->algo->master_xfer(adap, &msg, 1); + if (ret < 0) + return ret; + return 0; +} + int pm860x_page_reg_read(struct i2c_client *i2c, int reg) { - struct pm860x_chip *chip = i2c_get_clientdata(i2c); unsigned char zero = 0; unsigned char data; int ret; - mutex_lock(&chip->io_lock); - pm860x_write_device(i2c, 0xFA, 0, &zero); - pm860x_write_device(i2c, 0xFB, 0, &zero); - pm860x_write_device(i2c, 0xFF, 0, &zero); - ret = pm860x_read_device(i2c, reg, 1, &data); + i2c_lock_adapter(i2c->adapter); + read_device(i2c, 0xFA, 0, &zero); + read_device(i2c, 0xFB, 0, &zero); + read_device(i2c, 0xFF, 0, &zero); + ret = read_device(i2c, reg, 1, &data); if (ret >= 0) ret = (int)data; - pm860x_write_device(i2c, 0xFE, 0, &zero); - pm860x_write_device(i2c, 0xFC, 0, &zero); - mutex_unlock(&chip->io_lock); + read_device(i2c, 0xFE, 0, &zero); + read_device(i2c, 0xFC, 0, &zero); + i2c_unlock_adapter(i2c->adapter); return ret; } EXPORT_SYMBOL(pm860x_page_reg_read); @@ -150,18 +155,17 @@ EXPORT_SYMBOL(pm860x_page_reg_read); int pm860x_page_reg_write(struct i2c_client *i2c, int reg, unsigned char data) { - struct pm860x_chip *chip = i2c_get_clientdata(i2c); unsigned char zero; int ret; - mutex_lock(&chip->io_lock); - pm860x_write_device(i2c, 0xFA, 0, &zero); - pm860x_write_device(i2c, 0xFB, 0, &zero); - pm860x_write_device(i2c, 0xFF, 0, &zero); - ret = pm860x_write_device(i2c, reg, 1, &data); - pm860x_write_device(i2c, 0xFE, 0, &zero); - pm860x_write_device(i2c, 0xFC, 0, &zero); - mutex_unlock(&chip->io_lock); + i2c_lock_adapter(i2c->adapter); + read_device(i2c, 0xFA, 0, &zero); + read_device(i2c, 0xFB, 0, &zero); + read_device(i2c, 0xFF, 0, &zero); + ret = write_device(i2c, reg, 1, &data); + read_device(i2c, 0xFE, 0, &zero); + read_device(i2c, 0xFC, 0, &zero); + i2c_unlock_adapter(i2c->adapter); return ret; } EXPORT_SYMBOL(pm860x_page_reg_write); @@ -169,18 +173,17 @@ EXPORT_SYMBOL(pm860x_page_reg_write); int pm860x_page_bulk_read(struct i2c_client *i2c, int reg, int count, unsigned char *buf) { - struct pm860x_chip *chip = i2c_get_clientdata(i2c); unsigned char zero = 0; int ret; - mutex_lock(&chip->io_lock); - pm860x_write_device(i2c, 0xFA, 0, &zero); - pm860x_write_device(i2c, 0xFB, 0, &zero); - pm860x_write_device(i2c, 0xFF, 0, &zero); - ret = pm860x_read_device(i2c, reg, count, buf); - pm860x_write_device(i2c, 0xFE, 0, &zero); - pm860x_write_device(i2c, 0xFC, 0, &zero); - mutex_unlock(&chip->io_lock); + i2c_lock_adapter(i2c->adapter); + read_device(i2c, 0xfa, 0, &zero); + read_device(i2c, 0xfb, 0, &zero); + read_device(i2c, 0xff, 0, &zero); + ret = read_device(i2c, reg, count, buf); + read_device(i2c, 0xFE, 0, &zero); + read_device(i2c, 0xFC, 0, &zero); + i2c_unlock_adapter(i2c->adapter); return ret; } EXPORT_SYMBOL(pm860x_page_bulk_read); @@ -188,18 +191,18 @@ EXPORT_SYMBOL(pm860x_page_bulk_read); int pm860x_page_bulk_write(struct i2c_client *i2c, int reg, int count, unsigned char *buf) { - struct pm860x_chip *chip = i2c_get_clientdata(i2c); unsigned char zero = 0; int ret; - mutex_lock(&chip->io_lock); - pm860x_write_device(i2c, 0xFA, 0, &zero); - pm860x_write_device(i2c, 0xFB, 0, &zero); - pm860x_write_device(i2c, 0xFF, 0, &zero); - ret = pm860x_write_device(i2c, reg, count, buf); - pm860x_write_device(i2c, 0xFE, 0, &zero); - pm860x_write_device(i2c, 0xFC, 0, &zero); - mutex_unlock(&chip->io_lock); + i2c_lock_adapter(i2c->adapter); + read_device(i2c, 0xFA, 0, &zero); + read_device(i2c, 0xFB, 0, &zero); + read_device(i2c, 0xFF, 0, &zero); + ret = write_device(i2c, reg, count, buf); + read_device(i2c, 0xFE, 0, &zero); + read_device(i2c, 0xFC, 0, &zero); + i2c_unlock_adapter(i2c->adapter); + i2c_unlock_adapter(i2c->adapter); return ret; } EXPORT_SYMBOL(pm860x_page_bulk_write); @@ -207,25 +210,24 @@ EXPORT_SYMBOL(pm860x_page_bulk_write); int pm860x_page_set_bits(struct i2c_client *i2c, int reg, unsigned char mask, unsigned char data) { - struct pm860x_chip *chip = i2c_get_clientdata(i2c); unsigned char zero; unsigned char value; int ret; - mutex_lock(&chip->io_lock); - pm860x_write_device(i2c, 0xFA, 0, &zero); - pm860x_write_device(i2c, 0xFB, 0, &zero); - pm860x_write_device(i2c, 0xFF, 0, &zero); - ret = pm860x_read_device(i2c, reg, 1, &value); + i2c_lock_adapter(i2c->adapter); + read_device(i2c, 0xFA, 0, &zero); + read_device(i2c, 0xFB, 0, &zero); + read_device(i2c, 0xFF, 0, &zero); + ret = read_device(i2c, reg, 1, &value); if (ret < 0) goto out; value &= ~mask; value |= data; - ret = pm860x_write_device(i2c, reg, 1, &value); + ret = write_device(i2c, reg, 1, &value); out: - pm860x_write_device(i2c, 0xFE, 0, &zero); - pm860x_write_device(i2c, 0xFC, 0, &zero); - mutex_unlock(&chip->io_lock); + read_device(i2c, 0xFE, 0, &zero); + read_device(i2c, 0xFC, 0, &zero); + i2c_unlock_adapter(i2c->adapter); return ret; } EXPORT_SYMBOL(pm860x_page_set_bits); @@ -257,11 +259,17 @@ static int verify_addr(struct i2c_client *i2c) return 0; } +static struct regmap_config pm860x_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + static int __devinit pm860x_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pm860x_platform_data *pdata = client->dev.platform_data; struct pm860x_chip *chip; + int ret; if (!pdata) { pr_info("No platform data in %s!\n", __func__); @@ -273,10 +281,17 @@ static int __devinit pm860x_probe(struct i2c_client *client, return -ENOMEM; chip->id = verify_addr(client); + chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config); + if (IS_ERR(chip->regmap)) { + ret = PTR_ERR(chip->regmap); + dev_err(&client->dev, "Failed to allocate register map: %d\n", + ret); + kfree(chip); + return ret; + } chip->client = client; i2c_set_clientdata(client, chip); chip->dev = &client->dev; - mutex_init(&chip->io_lock); dev_set_drvdata(chip->dev, chip); /* @@ -290,6 +305,14 @@ static int __devinit pm860x_probe(struct i2c_client *client, chip->companion_addr = pdata->companion_addr; chip->companion = i2c_new_dummy(chip->client->adapter, chip->companion_addr); + chip->regmap_companion = regmap_init_i2c(chip->companion, + &pm860x_regmap_config); + if (IS_ERR(chip->regmap_companion)) { + ret = PTR_ERR(chip->regmap_companion); + dev_err(&chip->companion->dev, + "Failed to allocate register map: %d\n", ret); + return ret; + } i2c_set_clientdata(chip->companion, chip); } @@ -302,7 +325,11 @@ static int __devexit pm860x_remove(struct i2c_client *client) struct pm860x_chip *chip = i2c_get_clientdata(client); pm860x_device_exit(chip); - i2c_unregister_device(chip->companion); + if (chip->companion) { + regmap_exit(chip->regmap_companion); + i2c_unregister_device(chip->companion); + } + regmap_exit(chip->regmap); kfree(chip); return 0; } diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 053208d31fb..cd13e9f2f5e 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -12,6 +12,7 @@ config MFD_CORE config MFD_88PM860X bool "Support Marvell 88PM8606/88PM8607" depends on I2C=y && GENERIC_HARDIRQS + select REGMAP_I2C select MFD_CORE help This supports for Marvell 88PM8606/88PM8607 Power Management IC. @@ -199,7 +200,7 @@ config MENELAUS config TWL4030_CORE bool "Texas Instruments TWL4030/TWL5030/TWL6030/TPS659x0 Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y && GENERIC_HARDIRQS && IRQ_DOMAIN help Say yes here if you have TWL4030 / TWL6030 family chip on your board. This core driver provides register access and IRQ handling @@ -257,7 +258,7 @@ config TWL6040_CORE config MFD_STMPE bool "Support STMicroelectronics STMPE" - depends on I2C=y && GENERIC_HARDIRQS + depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS select MFD_CORE help Support for the STMPE family of I/O Expanders from @@ -278,6 +279,23 @@ config MFD_STMPE Keypad: stmpe-keypad Touchscreen: stmpe-ts +menu "STMPE Interface Drivers" +depends on MFD_STMPE + +config STMPE_I2C + bool "STMPE I2C Inteface" + depends on I2C=y + default y + help + This is used to enable I2C interface of STMPE + +config STMPE_SPI + bool "STMPE SPI Inteface" + depends on SPI_MASTER + help + This is used to enable SPI interface of STMPE +endmenu + config MFD_TC3589X bool "Support Toshiba TC35892 and variants" depends on I2C=y && GENERIC_HARDIRQS @@ -311,7 +329,7 @@ config MFD_TC6387XB config MFD_TC6393XB bool "Support Toshiba TC6393XB" - depends on GPIOLIB && ARM + depends on GPIOLIB && ARM && HAVE_CLK select MFD_CORE select MFD_TMIO help @@ -399,6 +417,17 @@ config MFD_MAX8998 additional drivers must be enabled in order to use the functionality of the device. +config MFD_S5M_CORE + bool "SAMSUNG S5M Series Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + Support for the Samsung Electronics S5M MFD series. + This driver provies common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device + config MFD_WM8400 tristate "Support Wolfson Microelectronics WM8400" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 47591fc7d0d..b953bab934f 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -16,6 +16,8 @@ obj-$(CONFIG_MFD_DM355EVM_MSP) += dm355evm_msp.o obj-$(CONFIG_MFD_TI_SSP) += ti-ssp.o obj-$(CONFIG_MFD_STMPE) += stmpe.o +obj-$(CONFIG_STMPE_I2C) += stmpe-i2c.o +obj-$(CONFIG_STMPE_SPI) += stmpe-spi.o obj-$(CONFIG_MFD_TC3589X) += tc3589x.o obj-$(CONFIG_MFD_T7L66XB) += t7l66xb.o tmio_core.o obj-$(CONFIG_MFD_TC6387XB) += tc6387xb.o tmio_core.o @@ -109,3 +111,4 @@ obj-$(CONFIG_MFD_PM8XXX_IRQ) += pm8xxx-irq.o obj-$(CONFIG_TPS65911_COMPARATOR) += tps65911-comparator.o obj-$(CONFIG_MFD_AAT2870_CORE) += aat2870-core.o obj-$(CONFIG_MFD_INTEL_MSIC) += intel_msic.o +obj-$(CONFIG_MFD_S5M_CORE) += s5m-core.o s5m-irq.o diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c index 02c42015ba5..3aa36eb5c79 100644 --- a/drivers/mfd/aat2870-core.c +++ b/drivers/mfd/aat2870-core.c @@ -407,13 +407,13 @@ static int aat2870_i2c_probe(struct i2c_client *client, aat2870->init(aat2870); if (aat2870->en_pin >= 0) { - ret = gpio_request(aat2870->en_pin, "aat2870-en"); + ret = gpio_request_one(aat2870->en_pin, GPIOF_OUT_INIT_HIGH, + "aat2870-en"); if (ret < 0) { dev_err(&client->dev, "Failed to request GPIO %d\n", aat2870->en_pin); goto out_kfree; } - gpio_direction_output(aat2870->en_pin, 1); } aat2870_enable(aat2870); @@ -468,9 +468,10 @@ static int aat2870_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int aat2870_i2c_suspend(struct i2c_client *client, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int aat2870_i2c_suspend(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); struct aat2870_data *aat2870 = i2c_get_clientdata(client); aat2870_disable(aat2870); @@ -478,8 +479,9 @@ static int aat2870_i2c_suspend(struct i2c_client *client, pm_message_t state) return 0; } -static int aat2870_i2c_resume(struct i2c_client *client) +static int aat2870_i2c_resume(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); struct aat2870_data *aat2870 = i2c_get_clientdata(client); struct aat2870_register *reg = NULL; int i; @@ -495,12 +497,12 @@ static int aat2870_i2c_resume(struct i2c_client *client) return 0; } -#else -#define aat2870_i2c_suspend NULL -#define aat2870_i2c_resume NULL -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(aat2870_pm_ops, aat2870_i2c_suspend, + aat2870_i2c_resume); -static struct i2c_device_id aat2870_i2c_id_table[] = { +static const struct i2c_device_id aat2870_i2c_id_table[] = { { "aat2870", 0 }, { } }; @@ -510,11 +512,10 @@ static struct i2c_driver aat2870_i2c_driver = { .driver = { .name = "aat2870", .owner = THIS_MODULE, + .pm = &aat2870_pm_ops, }, .probe = aat2870_i2c_probe, .remove = aat2870_i2c_remove, - .suspend = aat2870_i2c_suspend, - .resume = aat2870_i2c_resume, .id_table = aat2870_i2c_id_table, }; diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c index ec10629a0b0..bd56a764dea 100644 --- a/drivers/mfd/ab5500-core.c +++ b/drivers/mfd/ab5500-core.c @@ -22,8 +22,8 @@ #include <linux/irq.h> #include <linux/interrupt.h> #include <linux/random.h> -#include <linux/mfd/ab5500/ab5500.h> #include <linux/mfd/abx500.h> +#include <linux/mfd/abx500/ab5500.h> #include <linux/list.h> #include <linux/bitops.h> #include <linux/spinlock.h> diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c index b7b2d3483fd..72006940937 100644 --- a/drivers/mfd/ab5500-debugfs.c +++ b/drivers/mfd/ab5500-debugfs.c @@ -7,8 +7,8 @@ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/seq_file.h> -#include <linux/mfd/ab5500/ab5500.h> #include <linux/mfd/abx500.h> +#include <linux/mfd/abx500/ab5500.h> #include <linux/uaccess.h> #include "ab5500-core.h" diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index d3d572b2317..53e2a80f42f 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -17,7 +17,7 @@ #include <linux/platform_device.h> #include <linux/mfd/core.h> #include <linux/mfd/abx500.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/regulator/ab8500.h> /* diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index dedb7f65cea..9a0211aa889 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -13,7 +13,7 @@ #include <linux/platform_device.h> #include <linux/mfd/abx500.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> static u32 debug_bank; static u32 debug_address; diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index e985d1701a8..c39fc716e1d 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -18,9 +18,9 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/mfd/ab8500.h> #include <linux/mfd/abx500.h> -#include <linux/mfd/ab8500/gpadc.h> +#include <linux/mfd/abx500/ab8500.h> +#include <linux/mfd/abx500/ab8500-gpadc.h> /* * GPADC register offsets diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c index 9be541c6b00..087fecd71ce 100644 --- a/drivers/mfd/ab8500-i2c.c +++ b/drivers/mfd/ab8500-i2c.c @@ -10,7 +10,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/platform_device.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/mfd/db8500-prcmu.h> static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data) diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c index f20feefac19..c28d4eb1eff 100644 --- a/drivers/mfd/ab8500-sysctrl.c +++ b/drivers/mfd/ab8500-sysctrl.c @@ -7,9 +7,9 @@ #include <linux/err.h> #include <linux/module.h> #include <linux/platform_device.h> -#include <linux/mfd/ab8500.h> #include <linux/mfd/abx500.h> -#include <linux/mfd/ab8500/sysctrl.h> +#include <linux/mfd/abx500/ab8500.h> +#include <linux/mfd/abx500/ab8500-sysctrl.h> static struct device *sysctrl_dev; diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c index 155fa040788..315fef5d466 100644 --- a/drivers/mfd/cs5535-mfd.c +++ b/drivers/mfd/cs5535-mfd.c @@ -172,14 +172,14 @@ static void __devexit cs5535_mfd_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static struct pci_device_id cs5535_mfd_pci_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(cs5535_mfd_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) }, { 0, } }; MODULE_DEVICE_TABLE(pci, cs5535_mfd_pci_tbl); -static struct pci_driver cs5535_mfd_drv = { +static struct pci_driver cs5535_mfd_driver = { .name = DRV_NAME, .id_table = cs5535_mfd_pci_tbl, .probe = cs5535_mfd_probe, @@ -188,12 +188,12 @@ static struct pci_driver cs5535_mfd_drv = { static int __init cs5535_mfd_init(void) { - return pci_register_driver(&cs5535_mfd_drv); + return pci_register_driver(&cs5535_mfd_driver); } static void __exit cs5535_mfd_exit(void) { - pci_unregister_driver(&cs5535_mfd_drv); + pci_unregister_driver(&cs5535_mfd_driver); } module_init(cs5535_mfd_init); diff --git a/drivers/mfd/dm355evm_msp.c b/drivers/mfd/dm355evm_msp.c index 8ad88da647b..7710227d284 100644 --- a/drivers/mfd/dm355evm_msp.c +++ b/drivers/mfd/dm355evm_msp.c @@ -308,8 +308,7 @@ static int add_children(struct i2c_client *client) for (i = 0; i < ARRAY_SIZE(config_inputs); i++) { int gpio = dm355evm_msp_gpio.base + config_inputs[i].offset; - gpio_request(gpio, config_inputs[i].label); - gpio_direction_input(gpio); + gpio_request_one(gpio, GPIOF_IN, config_inputs[i].label); /* make it easy for userspace to see these */ gpio_export(gpio, false); diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c index 97c27762174..b76657eb0c5 100644 --- a/drivers/mfd/intel_msic.c +++ b/drivers/mfd/intel_msic.c @@ -485,17 +485,7 @@ static struct platform_driver intel_msic_driver = { }, }; -static int __init intel_msic_init(void) -{ - return platform_driver_register(&intel_msic_driver); -} -module_init(intel_msic_init); - -static void __exit intel_msic_exit(void) -{ - platform_driver_unregister(&intel_msic_driver); -} -module_exit(intel_msic_exit); +module_platform_driver(intel_msic_driver); MODULE_DESCRIPTION("Driver for Intel MSIC"); MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>"); diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c index ef39528088f..87662a17dec 100644 --- a/drivers/mfd/jz4740-adc.c +++ b/drivers/mfd/jz4740-adc.c @@ -181,7 +181,7 @@ static struct resource jz4740_battery_resources[] = { }, }; -const struct mfd_cell jz4740_adc_cells[] = { +static struct mfd_cell jz4740_adc_cells[] = { { .id = 0, .name = "jz4740-hwmon", @@ -338,17 +338,7 @@ static struct platform_driver jz4740_adc_driver = { }, }; -static int __init jz4740_adc_init(void) -{ - return platform_driver_register(&jz4740_adc_driver); -} -module_init(jz4740_adc_init); - -static void __exit jz4740_adc_exit(void) -{ - platform_driver_unregister(&jz4740_adc_driver); -} -module_exit(jz4740_adc_exit); +module_platform_driver(jz4740_adc_driver); MODULE_DESCRIPTION("JZ4740 SoC ADC driver"); MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c index ea1169b0477..abc421364a4 100644 --- a/drivers/mfd/lpc_sch.c +++ b/drivers/mfd/lpc_sch.c @@ -74,7 +74,7 @@ static struct mfd_cell tunnelcreek_cells[] = { }, }; -static struct pci_device_id lpc_sch_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(lpc_sch_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SCH_LPC) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ITC_LPC) }, { 0, } diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c index 0219115e00c..d9e4b36edee 100644 --- a/drivers/mfd/max8925-i2c.c +++ b/drivers/mfd/max8925-i2c.c @@ -161,6 +161,8 @@ static int __devinit max8925_probe(struct i2c_client *client, chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR); i2c_set_clientdata(chip->adc, chip); + device_init_wakeup(&client->dev, 1); + max8925_device_init(chip, pdata); return 0; @@ -177,10 +179,35 @@ static int __devexit max8925_remove(struct i2c_client *client) return 0; } +#ifdef CONFIG_PM_SLEEP +static int max8925_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct max8925_chip *chip = i2c_get_clientdata(client); + + if (device_may_wakeup(dev) && chip->wakeup_flag) + enable_irq_wake(chip->core_irq); + return 0; +} + +static int max8925_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct max8925_chip *chip = i2c_get_clientdata(client); + + if (device_may_wakeup(dev) && chip->wakeup_flag) + disable_irq_wake(chip->core_irq); + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(max8925_pm_ops, max8925_suspend, max8925_resume); + static struct i2c_driver max8925_driver = { .driver = { .name = "max8925", .owner = THIS_MODULE, + .pm = &max8925_pm_ops, }, .probe = max8925_probe, .remove = __devexit_p(max8925_remove), diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c index 5be53ae9b61..cb83a7ab53e 100644 --- a/drivers/mfd/max8997.c +++ b/drivers/mfd/max8997.c @@ -43,7 +43,8 @@ static struct mfd_cell max8997_devs[] = { { .name = "max8997-battery", }, { .name = "max8997-haptic", }, { .name = "max8997-muic", }, - { .name = "max8997-flash", }, + { .name = "max8997-led", .id = 1 }, + { .name = "max8997-led", .id = 2 }, }; int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest) diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index de4096aee24..6ef56d28c05 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -176,6 +176,8 @@ static int max8998_i2c_probe(struct i2c_client *i2c, if (ret < 0) goto err; + device_init_wakeup(max8998->dev, max8998->wakeup); + return ret; err: @@ -210,7 +212,7 @@ static int max8998_suspend(struct device *dev) struct i2c_client *i2c = container_of(dev, struct i2c_client, dev); struct max8998_dev *max8998 = i2c_get_clientdata(i2c); - if (max8998->wakeup) + if (device_may_wakeup(dev)) irq_set_irq_wake(max8998->irq, 1); return 0; } @@ -220,7 +222,7 @@ static int max8998_resume(struct device *dev) struct i2c_client *i2c = container_of(dev, struct i2c_client, dev); struct max8998_dev *max8998 = i2c_get_clientdata(i2c); - if (max8998->wakeup) + if (device_may_wakeup(dev)) irq_set_irq_wake(max8998->irq, 0); /* * In LP3974, if IRQ registers are not "read & clear" diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index e9619acc023..7122386b4e3 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -18,11 +18,15 @@ #include <linux/spi/spi.h> #include <linux/mfd/core.h> #include <linux/mfd/mc13xxx.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_gpio.h> struct mc13xxx { struct spi_device *spidev; struct mutex lock; int irq; + int flags; irq_handler_t irqhandler[MC13XXX_NUM_IRQ]; void *irqdata[MC13XXX_NUM_IRQ]; @@ -550,10 +554,7 @@ static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx) int mc13xxx_get_flags(struct mc13xxx *mc13xxx) { - struct mc13xxx_platform_data *pdata = - dev_get_platdata(&mc13xxx->spidev->dev); - - return pdata->flags; + return mc13xxx->flags; } EXPORT_SYMBOL(mc13xxx_get_flags); @@ -615,13 +616,13 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, break; case MC13XXX_ADC_MODE_SINGLE_CHAN: - adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK; + adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK; adc1 |= (channel & 0x7) << MC13XXX_ADC1_CHAN0_SHIFT; adc1 |= MC13XXX_ADC1_RAND; break; case MC13XXX_ADC_MODE_MULT_CHAN: - adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK; + adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK; adc1 |= 4 << MC13XXX_ADC1_CHAN1_SHIFT; break; @@ -696,17 +697,67 @@ static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) return mc13xxx_add_subdevice_pdata(mc13xxx, format, NULL, 0); } +#ifdef CONFIG_OF +static int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx) +{ + struct device_node *np = mc13xxx->spidev->dev.of_node; + + if (!np) + return -ENODEV; + + if (of_get_property(np, "fsl,mc13xxx-uses-adc", NULL)) + mc13xxx->flags |= MC13XXX_USE_ADC; + + if (of_get_property(np, "fsl,mc13xxx-uses-codec", NULL)) + mc13xxx->flags |= MC13XXX_USE_CODEC; + + if (of_get_property(np, "fsl,mc13xxx-uses-rtc", NULL)) + mc13xxx->flags |= MC13XXX_USE_RTC; + + if (of_get_property(np, "fsl,mc13xxx-uses-touch", NULL)) + mc13xxx->flags |= MC13XXX_USE_TOUCHSCREEN; + + return 0; +} +#else +static inline int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx) +{ + return -ENODEV; +} +#endif + +static const struct spi_device_id mc13xxx_device_id[] = { + { + .name = "mc13783", + .driver_data = MC13XXX_ID_MC13783, + }, { + .name = "mc13892", + .driver_data = MC13XXX_ID_MC13892, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(spi, mc13xxx_device_id); + +static const struct of_device_id mc13xxx_dt_ids[] = { + { .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, }, + { .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids); + static int mc13xxx_probe(struct spi_device *spi) { + const struct of_device_id *of_id; + struct spi_driver *sdrv = to_spi_driver(spi->dev.driver); struct mc13xxx *mc13xxx; struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev); enum mc13xxx_id id; int ret; - if (!pdata) { - dev_err(&spi->dev, "invalid platform data\n"); - return -EINVAL; - } + of_id = of_match_device(mc13xxx_dt_ids, &spi->dev); + if (of_id) + sdrv->id_table = &mc13xxx_device_id[(enum mc13xxx_id) of_id->data]; mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL); if (!mc13xxx) @@ -749,28 +800,33 @@ err_revision: mc13xxx_unlock(mc13xxx); - if (pdata->flags & MC13XXX_USE_ADC) + if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata) + mc13xxx->flags = pdata->flags; + + if (mc13xxx->flags & MC13XXX_USE_ADC) mc13xxx_add_subdevice(mc13xxx, "%s-adc"); - if (pdata->flags & MC13XXX_USE_CODEC) + if (mc13xxx->flags & MC13XXX_USE_CODEC) mc13xxx_add_subdevice(mc13xxx, "%s-codec"); - mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator", - &pdata->regulators, sizeof(pdata->regulators)); - - if (pdata->flags & MC13XXX_USE_RTC) + if (mc13xxx->flags & MC13XXX_USE_RTC) mc13xxx_add_subdevice(mc13xxx, "%s-rtc"); - if (pdata->flags & MC13XXX_USE_TOUCHSCREEN) + if (mc13xxx->flags & MC13XXX_USE_TOUCHSCREEN) mc13xxx_add_subdevice(mc13xxx, "%s-ts"); - if (pdata->leds) + if (pdata) { + mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator", + &pdata->regulators, sizeof(pdata->regulators)); mc13xxx_add_subdevice_pdata(mc13xxx, "%s-led", pdata->leds, sizeof(*pdata->leds)); - - if (pdata->buttons) mc13xxx_add_subdevice_pdata(mc13xxx, "%s-pwrbutton", pdata->buttons, sizeof(*pdata->buttons)); + } else { + mc13xxx_add_subdevice(mc13xxx, "%s-regulator"); + mc13xxx_add_subdevice(mc13xxx, "%s-led"); + mc13xxx_add_subdevice(mc13xxx, "%s-pwrbutton"); + } return 0; } @@ -788,25 +844,12 @@ static int __devexit mc13xxx_remove(struct spi_device *spi) return 0; } -static const struct spi_device_id mc13xxx_device_id[] = { - { - .name = "mc13783", - .driver_data = MC13XXX_ID_MC13783, - }, { - .name = "mc13892", - .driver_data = MC13XXX_ID_MC13892, - }, { - /* sentinel */ - } -}; -MODULE_DEVICE_TABLE(spi, mc13xxx_device_id); - static struct spi_driver mc13xxx_driver = { .id_table = mc13xxx_device_id, .driver = { .name = "mc13xxx", - .bus = &spi_bus_type, .owner = THIS_MODULE, + .of_match_table = mc13xxx_dt_ids, }, .probe = mc13xxx_probe, .remove = __devexit_p(mc13xxx_remove), diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index 84815f9ef63..63be60bc345 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -26,9 +26,35 @@ #define to_mcp(d) container_of(d, struct mcp, attached_device) #define to_mcp_driver(d) container_of(d, struct mcp_driver, drv) +static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id, + const char *codec) +{ + while (id->name[0]) { + if (strcmp(codec, id->name) == 0) + return id; + id++; + } + return NULL; +} + +const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp) +{ + const struct mcp_driver *driver = + to_mcp_driver(mcp->attached_device.driver); + + return mcp_match_id(driver->id_table, mcp->codec); +} +EXPORT_SYMBOL(mcp_get_device_id); + static int mcp_bus_match(struct device *dev, struct device_driver *drv) { - return 1; + const struct mcp *mcp = to_mcp(dev); + const struct mcp_driver *driver = to_mcp_driver(drv); + + if (driver->id_table) + return !!mcp_match_id(driver->id_table, mcp->codec); + + return 0; } static int mcp_bus_probe(struct device *dev) @@ -74,9 +100,18 @@ static int mcp_bus_resume(struct device *dev) return ret; } +static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct mcp *mcp = to_mcp(dev); + + add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec); + return 0; +} + static struct bus_type mcp_bus_type = { .name = "mcp", .match = mcp_bus_match, + .uevent = mcp_bus_uevent, .probe = mcp_bus_probe, .remove = mcp_bus_remove, .suspend = mcp_bus_suspend, @@ -212,9 +247,14 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size) } EXPORT_SYMBOL(mcp_host_alloc); -int mcp_host_register(struct mcp *mcp) +int mcp_host_register(struct mcp *mcp, void *pdata) { + if (!mcp->codec) + return -EINVAL; + + mcp->attached_device.platform_data = pdata; dev_set_name(&mcp->attached_device, "mcp0"); + request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec); return device_register(&mcp->attached_device); } EXPORT_SYMBOL(mcp_host_register); diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 2dab02d9ac8..9adc2eb6949 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/platform_device.h> #include <linux/mfd/mcp.h> +#include <linux/io.h> #include <mach/dma.h> #include <mach/hardware.h> @@ -26,12 +27,19 @@ #include <asm/system.h> #include <mach/mcp.h> -#include <mach/assabet.h> - +/* Register offsets */ +#define MCCR0 0x00 +#define MCDR0 0x08 +#define MCDR1 0x0C +#define MCDR2 0x10 +#define MCSR 0x18 +#define MCCR1 0x00 struct mcp_sa11x0 { - u32 mccr0; - u32 mccr1; + u32 mccr0; + u32 mccr1; + unsigned char *mccr0_base; + unsigned char *mccr1_base; }; #define priv(mcp) ((struct mcp_sa11x0 *)mcp_priv(mcp)) @@ -39,25 +47,25 @@ struct mcp_sa11x0 { static void mcp_sa11x0_set_telecom_divisor(struct mcp *mcp, unsigned int divisor) { - unsigned int mccr0; + struct mcp_sa11x0 *priv = priv(mcp); divisor /= 32; - mccr0 = Ser4MCCR0 & ~0x00007f00; - mccr0 |= divisor << 8; - Ser4MCCR0 = mccr0; + priv->mccr0 &= ~0x00007f00; + priv->mccr0 |= divisor << 8; + __raw_writel(priv->mccr0, priv->mccr0_base + MCCR0); } static void mcp_sa11x0_set_audio_divisor(struct mcp *mcp, unsigned int divisor) { - unsigned int mccr0; + struct mcp_sa11x0 *priv = priv(mcp); divisor /= 32; - mccr0 = Ser4MCCR0 & ~0x0000007f; - mccr0 |= divisor; - Ser4MCCR0 = mccr0; + priv->mccr0 &= ~0x0000007f; + priv->mccr0 |= divisor; + __raw_writel(priv->mccr0, priv->mccr0_base + MCCR0); } /* @@ -71,12 +79,16 @@ mcp_sa11x0_write(struct mcp *mcp, unsigned int reg, unsigned int val) { int ret = -ETIME; int i; + u32 mcpreg; + struct mcp_sa11x0 *priv = priv(mcp); - Ser4MCDR2 = reg << 17 | MCDR2_Wr | (val & 0xffff); + mcpreg = reg << 17 | MCDR2_Wr | (val & 0xffff); + __raw_writel(mcpreg, priv->mccr0_base + MCDR2); for (i = 0; i < 2; i++) { udelay(mcp->rw_timeout); - if (Ser4MCSR & MCSR_CWC) { + mcpreg = __raw_readl(priv->mccr0_base + MCSR); + if (mcpreg & MCSR_CWC) { ret = 0; break; } @@ -97,13 +109,18 @@ mcp_sa11x0_read(struct mcp *mcp, unsigned int reg) { int ret = -ETIME; int i; + u32 mcpreg; + struct mcp_sa11x0 *priv = priv(mcp); - Ser4MCDR2 = reg << 17 | MCDR2_Rd; + mcpreg = reg << 17 | MCDR2_Rd; + __raw_writel(mcpreg, priv->mccr0_base + MCDR2); for (i = 0; i < 2; i++) { udelay(mcp->rw_timeout); - if (Ser4MCSR & MCSR_CRC) { - ret = Ser4MCDR2 & 0xffff; + mcpreg = __raw_readl(priv->mccr0_base + MCSR); + if (mcpreg & MCSR_CRC) { + ret = __raw_readl(priv->mccr0_base + MCDR2) + & 0xffff; break; } } @@ -116,13 +133,19 @@ mcp_sa11x0_read(struct mcp *mcp, unsigned int reg) static void mcp_sa11x0_enable(struct mcp *mcp) { - Ser4MCSR = -1; - Ser4MCCR0 |= MCCR0_MCE; + struct mcp_sa11x0 *priv = priv(mcp); + + __raw_writel(-1, priv->mccr0_base + MCSR); + priv->mccr0 |= MCCR0_MCE; + __raw_writel(priv->mccr0, priv->mccr0_base + MCCR0); } static void mcp_sa11x0_disable(struct mcp *mcp) { - Ser4MCCR0 &= ~MCCR0_MCE; + struct mcp_sa11x0 *priv = priv(mcp); + + priv->mccr0 &= ~MCCR0_MCE; + __raw_writel(priv->mccr0, priv->mccr0_base + MCCR0); } /* @@ -142,50 +165,69 @@ static int mcp_sa11x0_probe(struct platform_device *pdev) struct mcp_plat_data *data = pdev->dev.platform_data; struct mcp *mcp; int ret; + struct mcp_sa11x0 *priv; + struct resource *res_mem0, *res_mem1; + u32 size0, size1; if (!data) return -ENODEV; - if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp")) + if (!data->codec) + return -ENODEV; + + res_mem0 = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res_mem0) + return -ENODEV; + size0 = res_mem0->end - res_mem0->start + 1; + + res_mem1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res_mem1) + return -ENODEV; + size1 = res_mem1->end - res_mem1->start + 1; + + if (!request_mem_region(res_mem0->start, size0, "sa11x0-mcp")) return -EBUSY; + if (!request_mem_region(res_mem1->start, size1, "sa11x0-mcp")) { + ret = -EBUSY; + goto release; + } + mcp = mcp_host_alloc(&pdev->dev, sizeof(struct mcp_sa11x0)); if (!mcp) { ret = -ENOMEM; - goto release; + goto release2; } + priv = priv(mcp); + mcp->owner = THIS_MODULE; mcp->ops = &mcp_sa11x0; mcp->sclk_rate = data->sclk_rate; - mcp->dma_audio_rd = DMA_Ser4MCP0Rd; - mcp->dma_audio_wr = DMA_Ser4MCP0Wr; - mcp->dma_telco_rd = DMA_Ser4MCP1Rd; - mcp->dma_telco_wr = DMA_Ser4MCP1Wr; - mcp->gpio_base = data->gpio_base; + mcp->dma_audio_rd = DDAR_DevAdd(res_mem0->start + MCDR0) + + DDAR_DevRd + DDAR_Brst4 + DDAR_8BitDev; + mcp->dma_audio_wr = DDAR_DevAdd(res_mem0->start + MCDR0) + + DDAR_DevWr + DDAR_Brst4 + DDAR_8BitDev; + mcp->dma_telco_rd = DDAR_DevAdd(res_mem0->start + MCDR1) + + DDAR_DevRd + DDAR_Brst4 + DDAR_8BitDev; + mcp->dma_telco_wr = DDAR_DevAdd(res_mem0->start + MCDR1) + + DDAR_DevWr + DDAR_Brst4 + DDAR_8BitDev; + mcp->codec = data->codec; platform_set_drvdata(pdev, mcp); - if (machine_is_assabet()) { - ASSABET_BCR_set(ASSABET_BCR_CODEC_RST); - } - - /* - * Setup the PPC unit correctly. - */ - PPDR &= ~PPC_RXD4; - PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PSDR |= PPC_RXD4; - PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); - PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); - /* * Initialise device. Note that we initially * set the sampling rate to minimum. */ - Ser4MCSR = -1; - Ser4MCCR1 = data->mccr1; - Ser4MCCR0 = data->mccr0 | 0x7f7f; + priv->mccr0_base = ioremap(res_mem0->start, size0); + priv->mccr1_base = ioremap(res_mem1->start, size1); + + __raw_writel(-1, priv->mccr0_base + MCSR); + priv->mccr1 = data->mccr1; + priv->mccr0 = data->mccr0 | 0x7f7f; + __raw_writel(priv->mccr0, priv->mccr0_base + MCCR0); + __raw_writel(priv->mccr1, priv->mccr1_base + MCCR1); /* * Calculate the read/write timeout (us) from the bit clock @@ -195,36 +237,53 @@ static int mcp_sa11x0_probe(struct platform_device *pdev) mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) / mcp->sclk_rate; - ret = mcp_host_register(mcp); + ret = mcp_host_register(mcp, data->codec_pdata); if (ret == 0) goto out; + release2: + release_mem_region(res_mem1->start, size1); release: - release_mem_region(0x80060000, 0x60); + release_mem_region(res_mem0->start, size0); platform_set_drvdata(pdev, NULL); out: return ret; } -static int mcp_sa11x0_remove(struct platform_device *dev) +static int mcp_sa11x0_remove(struct platform_device *pdev) { - struct mcp *mcp = platform_get_drvdata(dev); + struct mcp *mcp = platform_get_drvdata(pdev); + struct mcp_sa11x0 *priv = priv(mcp); + struct resource *res_mem; + u32 size; - platform_set_drvdata(dev, NULL); + platform_set_drvdata(pdev, NULL); mcp_host_unregister(mcp); - release_mem_region(0x80060000, 0x60); + res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res_mem) { + size = res_mem->end - res_mem->start + 1; + release_mem_region(res_mem->start, size); + } + res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (res_mem) { + size = res_mem->end - res_mem->start + 1; + release_mem_region(res_mem->start, size); + } + iounmap(priv->mccr0_base); + iounmap(priv->mccr1_base); return 0; } static int mcp_sa11x0_suspend(struct platform_device *dev, pm_message_t state) { struct mcp *mcp = platform_get_drvdata(dev); + struct mcp_sa11x0 *priv = priv(mcp); + u32 mccr0; - priv(mcp)->mccr0 = Ser4MCCR0; - priv(mcp)->mccr1 = Ser4MCCR1; - Ser4MCCR0 &= ~MCCR0_MCE; + mccr0 = priv->mccr0 & ~MCCR0_MCE; + __raw_writel(mccr0, priv->mccr0_base + MCCR0); return 0; } @@ -232,9 +291,10 @@ static int mcp_sa11x0_suspend(struct platform_device *dev, pm_message_t state) static int mcp_sa11x0_resume(struct platform_device *dev) { struct mcp *mcp = platform_get_drvdata(dev); + struct mcp_sa11x0 *priv = priv(mcp); - Ser4MCCR1 = priv(mcp)->mccr1; - Ser4MCCR0 = priv(mcp)->mccr0; + __raw_writel(priv->mccr0, priv->mccr0_base + MCCR0); + __raw_writel(priv->mccr1, priv->mccr1_base + MCCR1); return 0; } @@ -251,24 +311,14 @@ static struct platform_driver mcp_sa11x0_driver = { .resume = mcp_sa11x0_resume, .driver = { .name = "sa11x0-mcp", + .owner = THIS_MODULE, }, }; /* * This needs re-working */ -static int __init mcp_sa11x0_init(void) -{ - return platform_driver_register(&mcp_sa11x0_driver); -} - -static void __exit mcp_sa11x0_exit(void) -{ - platform_driver_unregister(&mcp_sa11x0_driver); -} - -module_init(mcp_sa11x0_init); -module_exit(mcp_sa11x0_exit); +module_platform_driver(mcp_sa11x0_driver); MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>"); MODULE_DESCRIPTION("SA11x0 multimedia communications port driver"); diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 3f565ef3e14..68ac2c55d5a 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -503,19 +503,13 @@ static void omap_usbhs_init(struct device *dev) spin_lock_irqsave(&omap->lock, flags); if (pdata->ehci_data->phy_reset) { - if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[0])) { - gpio_request(pdata->ehci_data->reset_gpio_port[0], - "USB1 PHY reset"); - gpio_direction_output - (pdata->ehci_data->reset_gpio_port[0], 0); - } + if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[0])) + gpio_request_one(pdata->ehci_data->reset_gpio_port[0], + GPIOF_OUT_INIT_LOW, "USB1 PHY reset"); - if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1])) { - gpio_request(pdata->ehci_data->reset_gpio_port[1], - "USB2 PHY reset"); - gpio_direction_output - (pdata->ehci_data->reset_gpio_port[1], 0); - } + if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1])) + gpio_request_one(pdata->ehci_data->reset_gpio_port[1], + GPIOF_OUT_INIT_LOW, "USB2 PHY reset"); /* Hold the PHY in RESET for enough time till DIR is high */ udelay(10); diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c index aed0d2a9b03..3927c17e417 100644 --- a/drivers/mfd/pcf50633-adc.c +++ b/drivers/mfd/pcf50633-adc.c @@ -249,17 +249,7 @@ static struct platform_driver pcf50633_adc_driver = { .remove = __devexit_p(pcf50633_adc_remove), }; -static int __init pcf50633_adc_init(void) -{ - return platform_driver_register(&pcf50633_adc_driver); -} -module_init(pcf50633_adc_init); - -static void __exit pcf50633_adc_exit(void) -{ - platform_driver_unregister(&pcf50633_adc_driver); -} -module_exit(pcf50633_adc_exit); +module_platform_driver(pcf50633_adc_driver); MODULE_AUTHOR("Balaji Rao <balajirrao@openmoko.org>"); MODULE_DESCRIPTION("PCF50633 adc driver"); diff --git a/drivers/mfd/s5m-core.c b/drivers/mfd/s5m-core.c new file mode 100644 index 00000000000..e075c113eec --- /dev/null +++ b/drivers/mfd/s5m-core.c @@ -0,0 +1,176 @@ +/* + * s5m87xx.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/pm_runtime.h> +#include <linux/mutex.h> +#include <linux/mfd/core.h> +#include <linux/mfd/s5m87xx/s5m-core.h> +#include <linux/mfd/s5m87xx/s5m-pmic.h> +#include <linux/mfd/s5m87xx/s5m-rtc.h> +#include <linux/regmap.h> + +static struct mfd_cell s5m87xx_devs[] = { + { + .name = "s5m8767-pmic", + }, { + .name = "s5m-rtc", + }, +}; + +int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest) +{ + return regmap_read(s5m87xx->regmap, reg, dest); +} +EXPORT_SYMBOL_GPL(s5m_reg_read); + +int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf) +{ + return regmap_bulk_read(s5m87xx->regmap, reg, buf, count);; +} +EXPORT_SYMBOL_GPL(s5m_bulk_read); + +int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value) +{ + return regmap_write(s5m87xx->regmap, reg, value); +} +EXPORT_SYMBOL_GPL(s5m_reg_write); + +int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf) +{ + return regmap_raw_write(s5m87xx->regmap, reg, buf, count * sizeof(u16)); +} +EXPORT_SYMBOL_GPL(s5m_bulk_write); + +int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask) +{ + return regmap_update_bits(s5m87xx->regmap, reg, mask, val); +} +EXPORT_SYMBOL_GPL(s5m_reg_update); + +static struct regmap_config s5m_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static int s5m87xx_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct s5m_platform_data *pdata = i2c->dev.platform_data; + struct s5m87xx_dev *s5m87xx; + int ret = 0; + int error; + + s5m87xx = kzalloc(sizeof(struct s5m87xx_dev), GFP_KERNEL); + if (s5m87xx == NULL) + return -ENOMEM; + + i2c_set_clientdata(i2c, s5m87xx); + s5m87xx->dev = &i2c->dev; + s5m87xx->i2c = i2c; + s5m87xx->irq = i2c->irq; + s5m87xx->type = id->driver_data; + + if (pdata) { + s5m87xx->device_type = pdata->device_type; + s5m87xx->ono = pdata->ono; + s5m87xx->irq_base = pdata->irq_base; + s5m87xx->wakeup = pdata->wakeup; + } + + s5m87xx->regmap = regmap_init_i2c(i2c, &s5m_regmap_config); + if (IS_ERR(s5m87xx->regmap)) { + error = PTR_ERR(s5m87xx->regmap); + dev_err(&i2c->dev, "Failed to allocate register map: %d\n", + error); + goto err; + } + + s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR); + i2c_set_clientdata(s5m87xx->rtc, s5m87xx); + + if (pdata->cfg_pmic_irq) + pdata->cfg_pmic_irq(); + + s5m_irq_init(s5m87xx); + + pm_runtime_set_active(s5m87xx->dev); + + ret = mfd_add_devices(s5m87xx->dev, -1, + s5m87xx_devs, ARRAY_SIZE(s5m87xx_devs), + NULL, 0); + + if (ret < 0) + goto err; + + return ret; + +err: + mfd_remove_devices(s5m87xx->dev); + s5m_irq_exit(s5m87xx); + i2c_unregister_device(s5m87xx->rtc); + regmap_exit(s5m87xx->regmap); + kfree(s5m87xx); + return ret; +} + +static int s5m87xx_i2c_remove(struct i2c_client *i2c) +{ + struct s5m87xx_dev *s5m87xx = i2c_get_clientdata(i2c); + + mfd_remove_devices(s5m87xx->dev); + s5m_irq_exit(s5m87xx); + i2c_unregister_device(s5m87xx->rtc); + regmap_exit(s5m87xx->regmap); + kfree(s5m87xx); + return 0; +} + +static const struct i2c_device_id s5m87xx_i2c_id[] = { + { "s5m87xx", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, s5m87xx_i2c_id); + +static struct i2c_driver s5m87xx_i2c_driver = { + .driver = { + .name = "s5m87xx", + .owner = THIS_MODULE, + }, + .probe = s5m87xx_i2c_probe, + .remove = s5m87xx_i2c_remove, + .id_table = s5m87xx_i2c_id, +}; + +static int __init s5m87xx_i2c_init(void) +{ + return i2c_add_driver(&s5m87xx_i2c_driver); +} + +subsys_initcall(s5m87xx_i2c_init); + +static void __exit s5m87xx_i2c_exit(void) +{ + i2c_del_driver(&s5m87xx_i2c_driver); +} +module_exit(s5m87xx_i2c_exit); + +MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>"); +MODULE_DESCRIPTION("Core support for the S5M MFD"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/s5m-irq.c b/drivers/mfd/s5m-irq.c new file mode 100644 index 00000000000..de76dfb6f0a --- /dev/null +++ b/drivers/mfd/s5m-irq.c @@ -0,0 +1,487 @@ +/* + * s5m-irq.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/device.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/mfd/s5m87xx/s5m-core.h> + +struct s5m_irq_data { + int reg; + int mask; +}; + +static struct s5m_irq_data s5m8767_irqs[] = { + [S5M8767_IRQ_PWRR] = { + .reg = 1, + .mask = S5M8767_IRQ_PWRR_MASK, + }, + [S5M8767_IRQ_PWRF] = { + .reg = 1, + .mask = S5M8767_IRQ_PWRF_MASK, + }, + [S5M8767_IRQ_PWR1S] = { + .reg = 1, + .mask = S5M8767_IRQ_PWR1S_MASK, + }, + [S5M8767_IRQ_JIGR] = { + .reg = 1, + .mask = S5M8767_IRQ_JIGR_MASK, + }, + [S5M8767_IRQ_JIGF] = { + .reg = 1, + .mask = S5M8767_IRQ_JIGF_MASK, + }, + [S5M8767_IRQ_LOWBAT2] = { + .reg = 1, + .mask = S5M8767_IRQ_LOWBAT2_MASK, + }, + [S5M8767_IRQ_LOWBAT1] = { + .reg = 1, + .mask = S5M8767_IRQ_LOWBAT1_MASK, + }, + [S5M8767_IRQ_MRB] = { + .reg = 2, + .mask = S5M8767_IRQ_MRB_MASK, + }, + [S5M8767_IRQ_DVSOK2] = { + .reg = 2, + .mask = S5M8767_IRQ_DVSOK2_MASK, + }, + [S5M8767_IRQ_DVSOK3] = { + .reg = 2, + .mask = S5M8767_IRQ_DVSOK3_MASK, + }, + [S5M8767_IRQ_DVSOK4] = { + .reg = 2, + .mask = S5M8767_IRQ_DVSOK4_MASK, + }, + [S5M8767_IRQ_RTC60S] = { + .reg = 3, + .mask = S5M8767_IRQ_RTC60S_MASK, + }, + [S5M8767_IRQ_RTCA1] = { + .reg = 3, + .mask = S5M8767_IRQ_RTCA1_MASK, + }, + [S5M8767_IRQ_RTCA2] = { + .reg = 3, + .mask = S5M8767_IRQ_RTCA2_MASK, + }, + [S5M8767_IRQ_SMPL] = { + .reg = 3, + .mask = S5M8767_IRQ_SMPL_MASK, + }, + [S5M8767_IRQ_RTC1S] = { + .reg = 3, + .mask = S5M8767_IRQ_RTC1S_MASK, + }, + [S5M8767_IRQ_WTSR] = { + .reg = 3, + .mask = S5M8767_IRQ_WTSR_MASK, + }, +}; + +static struct s5m_irq_data s5m8763_irqs[] = { + [S5M8763_IRQ_DCINF] = { + .reg = 1, + .mask = S5M8763_IRQ_DCINF_MASK, + }, + [S5M8763_IRQ_DCINR] = { + .reg = 1, + .mask = S5M8763_IRQ_DCINR_MASK, + }, + [S5M8763_IRQ_JIGF] = { + .reg = 1, + .mask = S5M8763_IRQ_JIGF_MASK, + }, + [S5M8763_IRQ_JIGR] = { + .reg = 1, + .mask = S5M8763_IRQ_JIGR_MASK, + }, + [S5M8763_IRQ_PWRONF] = { + .reg = 1, + .mask = S5M8763_IRQ_PWRONF_MASK, + }, + [S5M8763_IRQ_PWRONR] = { + .reg = 1, + .mask = S5M8763_IRQ_PWRONR_MASK, + }, + [S5M8763_IRQ_WTSREVNT] = { + .reg = 2, + .mask = S5M8763_IRQ_WTSREVNT_MASK, + }, + [S5M8763_IRQ_SMPLEVNT] = { + .reg = 2, + .mask = S5M8763_IRQ_SMPLEVNT_MASK, + }, + [S5M8763_IRQ_ALARM1] = { + .reg = 2, + .mask = S5M8763_IRQ_ALARM1_MASK, + }, + [S5M8763_IRQ_ALARM0] = { + .reg = 2, + .mask = S5M8763_IRQ_ALARM0_MASK, + }, + [S5M8763_IRQ_ONKEY1S] = { + .reg = 3, + .mask = S5M8763_IRQ_ONKEY1S_MASK, + }, + [S5M8763_IRQ_TOPOFFR] = { + .reg = 3, + .mask = S5M8763_IRQ_TOPOFFR_MASK, + }, + [S5M8763_IRQ_DCINOVPR] = { + .reg = 3, + .mask = S5M8763_IRQ_DCINOVPR_MASK, + }, + [S5M8763_IRQ_CHGRSTF] = { + .reg = 3, + .mask = S5M8763_IRQ_CHGRSTF_MASK, + }, + [S5M8763_IRQ_DONER] = { + .reg = 3, + .mask = S5M8763_IRQ_DONER_MASK, + }, + [S5M8763_IRQ_CHGFAULT] = { + .reg = 3, + .mask = S5M8763_IRQ_CHGFAULT_MASK, + }, + [S5M8763_IRQ_LOBAT1] = { + .reg = 4, + .mask = S5M8763_IRQ_LOBAT1_MASK, + }, + [S5M8763_IRQ_LOBAT2] = { + .reg = 4, + .mask = S5M8763_IRQ_LOBAT2_MASK, + }, +}; + +static inline struct s5m_irq_data * +irq_to_s5m8767_irq(struct s5m87xx_dev *s5m87xx, int irq) +{ + return &s5m8767_irqs[irq - s5m87xx->irq_base]; +} + +static void s5m8767_irq_lock(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + + mutex_lock(&s5m87xx->irqlock); +} + +static void s5m8767_irq_sync_unlock(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + int i; + + for (i = 0; i < ARRAY_SIZE(s5m87xx->irq_masks_cur); i++) { + if (s5m87xx->irq_masks_cur[i] != s5m87xx->irq_masks_cache[i]) { + s5m87xx->irq_masks_cache[i] = s5m87xx->irq_masks_cur[i]; + s5m_reg_write(s5m87xx, S5M8767_REG_INT1M + i, + s5m87xx->irq_masks_cur[i]); + } + } + + mutex_unlock(&s5m87xx->irqlock); +} + +static void s5m8767_irq_unmask(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + struct s5m_irq_data *irq_data = irq_to_s5m8767_irq(s5m87xx, + data->irq); + + s5m87xx->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask; +} + +static void s5m8767_irq_mask(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + struct s5m_irq_data *irq_data = irq_to_s5m8767_irq(s5m87xx, + data->irq); + + s5m87xx->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask; +} + +static struct irq_chip s5m8767_irq_chip = { + .name = "s5m8767", + .irq_bus_lock = s5m8767_irq_lock, + .irq_bus_sync_unlock = s5m8767_irq_sync_unlock, + .irq_mask = s5m8767_irq_mask, + .irq_unmask = s5m8767_irq_unmask, +}; + +static inline struct s5m_irq_data * +irq_to_s5m8763_irq(struct s5m87xx_dev *s5m87xx, int irq) +{ + return &s5m8763_irqs[irq - s5m87xx->irq_base]; +} + +static void s5m8763_irq_lock(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + + mutex_lock(&s5m87xx->irqlock); +} + +static void s5m8763_irq_sync_unlock(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + int i; + + for (i = 0; i < ARRAY_SIZE(s5m87xx->irq_masks_cur); i++) { + if (s5m87xx->irq_masks_cur[i] != s5m87xx->irq_masks_cache[i]) { + s5m87xx->irq_masks_cache[i] = s5m87xx->irq_masks_cur[i]; + s5m_reg_write(s5m87xx, S5M8763_REG_IRQM1 + i, + s5m87xx->irq_masks_cur[i]); + } + } + + mutex_unlock(&s5m87xx->irqlock); +} + +static void s5m8763_irq_unmask(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + struct s5m_irq_data *irq_data = irq_to_s5m8763_irq(s5m87xx, + data->irq); + + s5m87xx->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask; +} + +static void s5m8763_irq_mask(struct irq_data *data) +{ + struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data); + struct s5m_irq_data *irq_data = irq_to_s5m8763_irq(s5m87xx, + data->irq); + + s5m87xx->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask; +} + +static struct irq_chip s5m8763_irq_chip = { + .name = "s5m8763", + .irq_bus_lock = s5m8763_irq_lock, + .irq_bus_sync_unlock = s5m8763_irq_sync_unlock, + .irq_mask = s5m8763_irq_mask, + .irq_unmask = s5m8763_irq_unmask, +}; + + +static irqreturn_t s5m8767_irq_thread(int irq, void *data) +{ + struct s5m87xx_dev *s5m87xx = data; + u8 irq_reg[NUM_IRQ_REGS-1]; + int ret; + int i; + + + ret = s5m_bulk_read(s5m87xx, S5M8767_REG_INT1, + NUM_IRQ_REGS - 1, irq_reg); + if (ret < 0) { + dev_err(s5m87xx->dev, "Failed to read interrupt register: %d\n", + ret); + return IRQ_NONE; + } + + for (i = 0; i < NUM_IRQ_REGS - 1; i++) + irq_reg[i] &= ~s5m87xx->irq_masks_cur[i]; + + for (i = 0; i < S5M8767_IRQ_NR; i++) { + if (irq_reg[s5m8767_irqs[i].reg - 1] & s5m8767_irqs[i].mask) + handle_nested_irq(s5m87xx->irq_base + i); + } + + return IRQ_HANDLED; +} + +static irqreturn_t s5m8763_irq_thread(int irq, void *data) +{ + struct s5m87xx_dev *s5m87xx = data; + u8 irq_reg[NUM_IRQ_REGS]; + int ret; + int i; + + ret = s5m_bulk_read(s5m87xx, S5M8763_REG_IRQ1, + NUM_IRQ_REGS, irq_reg); + if (ret < 0) { + dev_err(s5m87xx->dev, "Failed to read interrupt register: %d\n", + ret); + return IRQ_NONE; + } + + for (i = 0; i < NUM_IRQ_REGS; i++) + irq_reg[i] &= ~s5m87xx->irq_masks_cur[i]; + + for (i = 0; i < S5M8763_IRQ_NR; i++) { + if (irq_reg[s5m8763_irqs[i].reg - 1] & s5m8763_irqs[i].mask) + handle_nested_irq(s5m87xx->irq_base + i); + } + + return IRQ_HANDLED; +} + +int s5m_irq_resume(struct s5m87xx_dev *s5m87xx) +{ + if (s5m87xx->irq && s5m87xx->irq_base){ + switch (s5m87xx->device_type) { + case S5M8763X: + s5m8763_irq_thread(s5m87xx->irq_base, s5m87xx); + break; + case S5M8767X: + s5m8767_irq_thread(s5m87xx->irq_base, s5m87xx); + break; + default: + break; + + } + } + return 0; +} + +int s5m_irq_init(struct s5m87xx_dev *s5m87xx) +{ + int i; + int cur_irq; + int ret = 0; + int type = s5m87xx->device_type; + + if (!s5m87xx->irq) { + dev_warn(s5m87xx->dev, + "No interrupt specified, no interrupts\n"); + s5m87xx->irq_base = 0; + return 0; + } + + if (!s5m87xx->irq_base) { + dev_err(s5m87xx->dev, + "No interrupt base specified, no interrupts\n"); + return 0; + } + + mutex_init(&s5m87xx->irqlock); + + switch (type) { + case S5M8763X: + for (i = 0; i < NUM_IRQ_REGS; i++) { + s5m87xx->irq_masks_cur[i] = 0xff; + s5m87xx->irq_masks_cache[i] = 0xff; + s5m_reg_write(s5m87xx, S5M8763_REG_IRQM1 + i, + 0xff); + } + + s5m_reg_write(s5m87xx, S5M8763_REG_STATUSM1, 0xff); + s5m_reg_write(s5m87xx, S5M8763_REG_STATUSM2, 0xff); + + for (i = 0; i < S5M8763_IRQ_NR; i++) { + cur_irq = i + s5m87xx->irq_base; + irq_set_chip_data(cur_irq, s5m87xx); + irq_set_chip_and_handler(cur_irq, &s5m8763_irq_chip, + handle_edge_irq); + irq_set_nested_thread(cur_irq, 1); +#ifdef CONFIG_ARM + set_irq_flags(cur_irq, IRQF_VALID); +#else + irq_set_noprobe(cur_irq); +#endif + } + + ret = request_threaded_irq(s5m87xx->irq, NULL, + s5m8763_irq_thread, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "s5m87xx-irq", s5m87xx); + if (ret) { + dev_err(s5m87xx->dev, "Failed to request IRQ %d: %d\n", + s5m87xx->irq, ret); + return ret; + } + break; + case S5M8767X: + for (i = 0; i < NUM_IRQ_REGS - 1; i++) { + s5m87xx->irq_masks_cur[i] = 0xff; + s5m87xx->irq_masks_cache[i] = 0xff; + s5m_reg_write(s5m87xx, S5M8767_REG_INT1M + i, + 0xff); + } + for (i = 0; i < S5M8767_IRQ_NR; i++) { + cur_irq = i + s5m87xx->irq_base; + irq_set_chip_data(cur_irq, s5m87xx); + if (ret) { + dev_err(s5m87xx->dev, + "Failed to irq_set_chip_data %d: %d\n", + s5m87xx->irq, ret); + return ret; + } + + irq_set_chip_and_handler(cur_irq, &s5m8767_irq_chip, + handle_edge_irq); + irq_set_nested_thread(cur_irq, 1); +#ifdef CONFIG_ARM + set_irq_flags(cur_irq, IRQF_VALID); +#else + irq_set_noprobe(cur_irq); +#endif + } + + ret = request_threaded_irq(s5m87xx->irq, NULL, + s5m8767_irq_thread, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "s5m87xx-irq", s5m87xx); + if (ret) { + dev_err(s5m87xx->dev, "Failed to request IRQ %d: %d\n", + s5m87xx->irq, ret); + return ret; + } + break; + default: + break; + } + + if (!s5m87xx->ono) + return 0; + + switch (type) { + case S5M8763X: + ret = request_threaded_irq(s5m87xx->ono, NULL, + s5m8763_irq_thread, + IRQF_TRIGGER_FALLING | + IRQF_TRIGGER_RISING | + IRQF_ONESHOT, "s5m87xx-ono", + s5m87xx); + break; + case S5M8767X: + ret = request_threaded_irq(s5m87xx->ono, NULL, + s5m8767_irq_thread, + IRQF_TRIGGER_FALLING | + IRQF_TRIGGER_RISING | + IRQF_ONESHOT, "s5m87xx-ono", s5m87xx); + break; + default: + break; + } + + if (ret) + dev_err(s5m87xx->dev, "Failed to request IRQ %d: %d\n", + s5m87xx->ono, ret); + + return 0; +} + +void s5m_irq_exit(struct s5m87xx_dev *s5m87xx) +{ + if (s5m87xx->ono) + free_irq(s5m87xx->ono, s5m87xx); + + if (s5m87xx->irq) + free_irq(s5m87xx->irq, s5m87xx); +} diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index df3702c1756..f4d86117f44 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -1720,7 +1720,7 @@ static int sm501_plat_remove(struct platform_device *dev) return 0; } -static struct pci_device_id sm501_pci_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(sm501_pci_tbl) = { { 0x126f, 0x0501, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { 0, }, }; diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c new file mode 100644 index 00000000000..373f423b118 --- /dev/null +++ b/drivers/mfd/stmpe-i2c.c @@ -0,0 +1,109 @@ +/* + * ST Microelectronics MFD: stmpe's i2c client specific driver + * + * Copyright (C) ST-Ericsson SA 2010 + * Copyright (C) ST Microelectronics SA 2011 + * + * License Terms: GNU General Public License, version 2 + * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson + * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics + */ + +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include "stmpe.h" + +static int i2c_reg_read(struct stmpe *stmpe, u8 reg) +{ + struct i2c_client *i2c = stmpe->client; + + return i2c_smbus_read_byte_data(i2c, reg); +} + +static int i2c_reg_write(struct stmpe *stmpe, u8 reg, u8 val) +{ + struct i2c_client *i2c = stmpe->client; + + return i2c_smbus_write_byte_data(i2c, reg, val); +} + +static int i2c_block_read(struct stmpe *stmpe, u8 reg, u8 length, u8 *values) +{ + struct i2c_client *i2c = stmpe->client; + + return i2c_smbus_read_i2c_block_data(i2c, reg, length, values); +} + +static int i2c_block_write(struct stmpe *stmpe, u8 reg, u8 length, + const u8 *values) +{ + struct i2c_client *i2c = stmpe->client; + + return i2c_smbus_write_i2c_block_data(i2c, reg, length, values); +} + +static struct stmpe_client_info i2c_ci = { + .read_byte = i2c_reg_read, + .write_byte = i2c_reg_write, + .read_block = i2c_block_read, + .write_block = i2c_block_write, +}; + +static int __devinit +stmpe_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) +{ + i2c_ci.data = (void *)id; + i2c_ci.irq = i2c->irq; + i2c_ci.client = i2c; + i2c_ci.dev = &i2c->dev; + + return stmpe_probe(&i2c_ci, id->driver_data); +} + +static int __devexit stmpe_i2c_remove(struct i2c_client *i2c) +{ + struct stmpe *stmpe = dev_get_drvdata(&i2c->dev); + + return stmpe_remove(stmpe); +} + +static const struct i2c_device_id stmpe_i2c_id[] = { + { "stmpe610", STMPE610 }, + { "stmpe801", STMPE801 }, + { "stmpe811", STMPE811 }, + { "stmpe1601", STMPE1601 }, + { "stmpe2401", STMPE2401 }, + { "stmpe2403", STMPE2403 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, stmpe_id); + +static struct i2c_driver stmpe_i2c_driver = { + .driver.name = "stmpe-i2c", + .driver.owner = THIS_MODULE, +#ifdef CONFIG_PM + .driver.pm = &stmpe_dev_pm_ops, +#endif + .probe = stmpe_i2c_probe, + .remove = __devexit_p(stmpe_i2c_remove), + .id_table = stmpe_i2c_id, +}; + +static int __init stmpe_init(void) +{ + return i2c_add_driver(&stmpe_i2c_driver); +} +subsys_initcall(stmpe_init); + +static void __exit stmpe_exit(void) +{ + i2c_del_driver(&stmpe_i2c_driver); +} +module_exit(stmpe_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("STMPE MFD I2C Interface Driver"); +MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>"); diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c new file mode 100644 index 00000000000..b58c43c7ea9 --- /dev/null +++ b/drivers/mfd/stmpe-spi.c @@ -0,0 +1,150 @@ +/* + * ST Microelectronics MFD: stmpe's spi client specific driver + * + * Copyright (C) ST Microelectronics SA 2011 + * + * License Terms: GNU General Public License, version 2 + * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics + */ + +#include <linux/spi/spi.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include "stmpe.h" + +#define READ_CMD (1 << 7) + +static int spi_reg_read(struct stmpe *stmpe, u8 reg) +{ + struct spi_device *spi = stmpe->client; + int status = spi_w8r16(spi, reg | READ_CMD); + + return (status < 0) ? status : status >> 8; +} + +static int spi_reg_write(struct stmpe *stmpe, u8 reg, u8 val) +{ + struct spi_device *spi = stmpe->client; + u16 cmd = (val << 8) | reg; + + return spi_write(spi, (const u8 *)&cmd, 2); +} + +static int spi_block_read(struct stmpe *stmpe, u8 reg, u8 length, u8 *values) +{ + int ret, i; + + for (i = 0; i < length; i++) { + ret = spi_reg_read(stmpe, reg + i); + if (ret < 0) + return ret; + *(values + i) = ret; + } + + return 0; +} + +static int spi_block_write(struct stmpe *stmpe, u8 reg, u8 length, + const u8 *values) +{ + int ret = 0, i; + + for (i = length; i > 0; i--, reg++) { + ret = spi_reg_write(stmpe, reg, *(values + i - 1)); + if (ret < 0) + return ret; + } + + return ret; +} + +static void spi_init(struct stmpe *stmpe) +{ + struct spi_device *spi = stmpe->client; + + spi->bits_per_word = 8; + + /* This register is only present for stmpe811 */ + if (stmpe->variant->id_val == 0x0811) + spi_reg_write(stmpe, STMPE811_REG_SPI_CFG, spi->mode); + + if (spi_setup(spi) < 0) + dev_dbg(&spi->dev, "spi_setup failed\n"); +} + +static struct stmpe_client_info spi_ci = { + .read_byte = spi_reg_read, + .write_byte = spi_reg_write, + .read_block = spi_block_read, + .write_block = spi_block_write, + .init = spi_init, +}; + +static int __devinit +stmpe_spi_probe(struct spi_device *spi) +{ + const struct spi_device_id *id = spi_get_device_id(spi); + + /* don't exceed max specified rate - 1MHz - Limitation of STMPE */ + if (spi->max_speed_hz > 1000000) { + dev_dbg(&spi->dev, "f(sample) %d KHz?\n", + (spi->max_speed_hz/1000)); + return -EINVAL; + } + + spi_ci.irq = spi->irq; + spi_ci.client = spi; + spi_ci.dev = &spi->dev; + + return stmpe_probe(&spi_ci, id->driver_data); +} + +static int __devexit stmpe_spi_remove(struct spi_device *spi) +{ + struct stmpe *stmpe = dev_get_drvdata(&spi->dev); + + return stmpe_remove(stmpe); +} + +static const struct spi_device_id stmpe_spi_id[] = { + { "stmpe610", STMPE610 }, + { "stmpe801", STMPE801 }, + { "stmpe811", STMPE811 }, + { "stmpe1601", STMPE1601 }, + { "stmpe2401", STMPE2401 }, + { "stmpe2403", STMPE2403 }, + { } +}; +MODULE_DEVICE_TABLE(spi, stmpe_id); + +static struct spi_driver stmpe_spi_driver = { + .driver = { + .name = "stmpe-spi", + .bus = &spi_bus_type, + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &stmpe_dev_pm_ops, +#endif + }, + .probe = stmpe_spi_probe, + .remove = __devexit_p(stmpe_spi_remove), + .id_table = stmpe_spi_id, +}; + +static int __init stmpe_init(void) +{ + return spi_register_driver(&stmpe_spi_driver); +} +subsys_initcall(stmpe_init); + +static void __exit stmpe_exit(void) +{ + spi_unregister_driver(&stmpe_spi_driver); +} +module_exit(stmpe_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("STMPE MFD SPI Interface Driver"); +MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>"); diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 2963689cf45..e07947e56b2 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -1,18 +1,20 @@ /* + * ST Microelectronics MFD: stmpe's driver + * * Copyright (C) ST-Ericsson SA 2010 * * License Terms: GNU General Public License, version 2 * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson */ +#include <linux/gpio.h> +#include <linux/export.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/pm.h> #include <linux/slab.h> -#include <linux/i2c.h> #include <linux/mfd/core.h> -#include <linux/mfd/stmpe.h> #include "stmpe.h" static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks) @@ -29,10 +31,9 @@ static int __stmpe_reg_read(struct stmpe *stmpe, u8 reg) { int ret; - ret = i2c_smbus_read_byte_data(stmpe->i2c, reg); + ret = stmpe->ci->read_byte(stmpe, reg); if (ret < 0) - dev_err(stmpe->dev, "failed to read reg %#x: %d\n", - reg, ret); + dev_err(stmpe->dev, "failed to read reg %#x: %d\n", reg, ret); dev_vdbg(stmpe->dev, "rd: reg %#x => data %#x\n", reg, ret); @@ -45,10 +46,9 @@ static int __stmpe_reg_write(struct stmpe *stmpe, u8 reg, u8 val) dev_vdbg(stmpe->dev, "wr: reg %#x <= %#x\n", reg, val); - ret = i2c_smbus_write_byte_data(stmpe->i2c, reg, val); + ret = stmpe->ci->write_byte(stmpe, reg, val); if (ret < 0) - dev_err(stmpe->dev, "failed to write reg %#x: %d\n", - reg, ret); + dev_err(stmpe->dev, "failed to write reg %#x: %d\n", reg, ret); return ret; } @@ -72,10 +72,9 @@ static int __stmpe_block_read(struct stmpe *stmpe, u8 reg, u8 length, { int ret; - ret = i2c_smbus_read_i2c_block_data(stmpe->i2c, reg, length, values); + ret = stmpe->ci->read_block(stmpe, reg, length, values); if (ret < 0) - dev_err(stmpe->dev, "failed to read regs %#x: %d\n", - reg, ret); + dev_err(stmpe->dev, "failed to read regs %#x: %d\n", reg, ret); dev_vdbg(stmpe->dev, "rd: reg %#x (%d) => ret %#x\n", reg, length, ret); stmpe_dump_bytes("stmpe rd: ", values, length); @@ -91,11 +90,9 @@ static int __stmpe_block_write(struct stmpe *stmpe, u8 reg, u8 length, dev_vdbg(stmpe->dev, "wr: regs %#x (%d)\n", reg, length); stmpe_dump_bytes("stmpe wr: ", values, length); - ret = i2c_smbus_write_i2c_block_data(stmpe->i2c, reg, length, - values); + ret = stmpe->ci->write_block(stmpe, reg, length, values); if (ret < 0) - dev_err(stmpe->dev, "failed to write regs %#x: %d\n", - reg, ret); + dev_err(stmpe->dev, "failed to write regs %#x: %d\n", reg, ret); return ret; } @@ -245,12 +242,14 @@ int stmpe_set_altfunc(struct stmpe *stmpe, u32 pins, enum stmpe_block block) u8 regaddr = stmpe->regs[STMPE_IDX_GPAFR_U_MSB]; int af_bits = variant->af_bits; int numregs = DIV_ROUND_UP(stmpe->num_gpios * af_bits, 8); - int afperreg = 8 / af_bits; int mask = (1 << af_bits) - 1; u8 regs[numregs]; - int af; - int ret; + int af, afperreg, ret; + + if (!variant->get_altfunc) + return 0; + afperreg = 8 / af_bits; mutex_lock(&stmpe->lock); ret = __stmpe_enable(stmpe, STMPE_BLOCK_GPIO); @@ -325,7 +324,51 @@ static struct mfd_cell stmpe_keypad_cell = { }; /* - * Touchscreen (STMPE811) + * STMPE801 + */ +static const u8 stmpe801_regs[] = { + [STMPE_IDX_CHIP_ID] = STMPE801_REG_CHIP_ID, + [STMPE_IDX_ICR_LSB] = STMPE801_REG_SYS_CTRL, + [STMPE_IDX_GPMR_LSB] = STMPE801_REG_GPIO_MP_STA, + [STMPE_IDX_GPSR_LSB] = STMPE801_REG_GPIO_SET_PIN, + [STMPE_IDX_GPCR_LSB] = STMPE801_REG_GPIO_SET_PIN, + [STMPE_IDX_GPDR_LSB] = STMPE801_REG_GPIO_DIR, + [STMPE_IDX_IEGPIOR_LSB] = STMPE801_REG_GPIO_INT_EN, + [STMPE_IDX_ISGPIOR_MSB] = STMPE801_REG_GPIO_INT_STA, + +}; + +static struct stmpe_variant_block stmpe801_blocks[] = { + { + .cell = &stmpe_gpio_cell, + .irq = 0, + .block = STMPE_BLOCK_GPIO, + }, +}; + +static int stmpe801_enable(struct stmpe *stmpe, unsigned int blocks, + bool enable) +{ + if (blocks & STMPE_BLOCK_GPIO) + return 0; + else + return -EINVAL; +} + +static struct stmpe_variant_info stmpe801 = { + .name = "stmpe801", + .id_val = STMPE801_ID, + .id_mask = 0xffff, + .num_gpios = 8, + .regs = stmpe801_regs, + .blocks = stmpe801_blocks, + .num_blocks = ARRAY_SIZE(stmpe801_blocks), + .num_irqs = STMPE801_NR_INTERNAL_IRQS, + .enable = stmpe801_enable, +}; + +/* + * Touchscreen (STMPE811 or STMPE610) */ static struct resource stmpe_ts_resources[] = { @@ -350,7 +393,7 @@ static struct mfd_cell stmpe_ts_cell = { }; /* - * STMPE811 + * STMPE811 or STMPE610 */ static const u8 stmpe811_regs[] = { @@ -421,6 +464,21 @@ static struct stmpe_variant_info stmpe811 = { .get_altfunc = stmpe811_get_altfunc, }; +/* Similar to 811, except number of gpios */ +static struct stmpe_variant_info stmpe610 = { + .name = "stmpe610", + .id_val = 0x0811, + .id_mask = 0xffff, + .num_gpios = 6, + .af_bits = 1, + .regs = stmpe811_regs, + .blocks = stmpe811_blocks, + .num_blocks = ARRAY_SIZE(stmpe811_blocks), + .num_irqs = STMPE811_NR_INTERNAL_IRQS, + .enable = stmpe811_enable, + .get_altfunc = stmpe811_get_altfunc, +}; + /* * STMPE1601 */ @@ -655,6 +713,8 @@ static struct stmpe_variant_info stmpe2403 = { }; static struct stmpe_variant_info *stmpe_variant_info[] = { + [STMPE610] = &stmpe610, + [STMPE801] = &stmpe801, [STMPE811] = &stmpe811, [STMPE1601] = &stmpe1601, [STMPE2401] = &stmpe2401, @@ -671,6 +731,11 @@ static irqreturn_t stmpe_irq(int irq, void *data) int ret; int i; + if (variant->id_val == STMPE801_ID) { + handle_nested_irq(stmpe->irq_base); + return IRQ_HANDLED; + } + ret = stmpe_block_read(stmpe, israddr, num, isr); if (ret < 0) return IRQ_NONE; @@ -757,14 +822,17 @@ static struct irq_chip stmpe_irq_chip = { static int __devinit stmpe_irq_init(struct stmpe *stmpe) { + struct irq_chip *chip = NULL; int num_irqs = stmpe->variant->num_irqs; int base = stmpe->irq_base; int irq; + if (stmpe->variant->id_val != STMPE801_ID) + chip = &stmpe_irq_chip; + for (irq = base; irq < base + num_irqs; irq++) { irq_set_chip_data(irq, stmpe); - irq_set_chip_and_handler(irq, &stmpe_irq_chip, - handle_edge_irq); + irq_set_chip_and_handler(irq, chip, handle_edge_irq); irq_set_nested_thread(irq, 1); #ifdef CONFIG_ARM set_irq_flags(irq, IRQF_VALID); @@ -796,7 +864,7 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe) unsigned int irq_trigger = stmpe->pdata->irq_trigger; int autosleep_timeout = stmpe->pdata->autosleep_timeout; struct stmpe_variant_info *variant = stmpe->variant; - u8 icr = STMPE_ICR_LSB_GIM; + u8 icr; unsigned int id; u8 data[2]; int ret; @@ -819,16 +887,32 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe) if (ret) return ret; - if (irq_trigger == IRQF_TRIGGER_FALLING || - irq_trigger == IRQF_TRIGGER_RISING) - icr |= STMPE_ICR_LSB_EDGE; + if (id == STMPE801_ID) + icr = STMPE801_REG_SYS_CTRL_INT_EN; + else + icr = STMPE_ICR_LSB_GIM; + + /* STMPE801 doesn't support Edge interrupts */ + if (id != STMPE801_ID) { + if (irq_trigger == IRQF_TRIGGER_FALLING || + irq_trigger == IRQF_TRIGGER_RISING) + icr |= STMPE_ICR_LSB_EDGE; + } if (irq_trigger == IRQF_TRIGGER_RISING || - irq_trigger == IRQF_TRIGGER_HIGH) - icr |= STMPE_ICR_LSB_HIGH; + irq_trigger == IRQF_TRIGGER_HIGH) { + if (id == STMPE801_ID) + icr |= STMPE801_REG_SYS_CTRL_INT_HI; + else + icr |= STMPE_ICR_LSB_HIGH; + } - if (stmpe->pdata->irq_invert_polarity) - icr ^= STMPE_ICR_LSB_HIGH; + if (stmpe->pdata->irq_invert_polarity) { + if (id == STMPE801_ID) + icr ^= STMPE801_REG_SYS_CTRL_INT_HI; + else + icr ^= STMPE_ICR_LSB_HIGH; + } if (stmpe->pdata->autosleep) { ret = stmpe_autosleep(stmpe, autosleep_timeout); @@ -873,32 +957,10 @@ static int __devinit stmpe_devices_init(struct stmpe *stmpe) return ret; } -#ifdef CONFIG_PM -static int stmpe_suspend(struct device *dev) -{ - struct i2c_client *i2c = to_i2c_client(dev); - - if (device_may_wakeup(&i2c->dev)) - enable_irq_wake(i2c->irq); - - return 0; -} - -static int stmpe_resume(struct device *dev) -{ - struct i2c_client *i2c = to_i2c_client(dev); - - if (device_may_wakeup(&i2c->dev)) - disable_irq_wake(i2c->irq); - - return 0; -} -#endif - -static int __devinit stmpe_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +/* Called from client specific probe routines */ +int __devinit stmpe_probe(struct stmpe_client_info *ci, int partnum) { - struct stmpe_platform_data *pdata = i2c->dev.platform_data; + struct stmpe_platform_data *pdata = dev_get_platdata(ci->dev); struct stmpe *stmpe; int ret; @@ -912,30 +974,43 @@ static int __devinit stmpe_probe(struct i2c_client *i2c, mutex_init(&stmpe->irq_lock); mutex_init(&stmpe->lock); - stmpe->dev = &i2c->dev; - stmpe->i2c = i2c; - + stmpe->dev = ci->dev; + stmpe->client = ci->client; stmpe->pdata = pdata; stmpe->irq_base = pdata->irq_base; - - stmpe->partnum = id->driver_data; - stmpe->variant = stmpe_variant_info[stmpe->partnum]; + stmpe->ci = ci; + stmpe->partnum = partnum; + stmpe->variant = stmpe_variant_info[partnum]; stmpe->regs = stmpe->variant->regs; stmpe->num_gpios = stmpe->variant->num_gpios; + dev_set_drvdata(stmpe->dev, stmpe); - i2c_set_clientdata(i2c, stmpe); + if (ci->init) + ci->init(stmpe); + + if (pdata->irq_over_gpio) { + ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe"); + if (ret) { + dev_err(stmpe->dev, "failed to request IRQ GPIO: %d\n", + ret); + goto out_free; + } + + stmpe->irq = gpio_to_irq(pdata->irq_gpio); + } else { + stmpe->irq = ci->irq; + } ret = stmpe_chip_init(stmpe); if (ret) - goto out_free; + goto free_gpio; ret = stmpe_irq_init(stmpe); if (ret) - goto out_free; + goto free_gpio; - ret = request_threaded_irq(stmpe->i2c->irq, NULL, stmpe_irq, - pdata->irq_trigger | IRQF_ONESHOT, - "stmpe", stmpe); + ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq, + pdata->irq_trigger | IRQF_ONESHOT, "stmpe", stmpe); if (ret) { dev_err(stmpe->dev, "failed to request IRQ: %d\n", ret); goto out_removeirq; @@ -951,67 +1026,55 @@ static int __devinit stmpe_probe(struct i2c_client *i2c, out_removedevs: mfd_remove_devices(stmpe->dev); - free_irq(stmpe->i2c->irq, stmpe); + free_irq(stmpe->irq, stmpe); out_removeirq: stmpe_irq_remove(stmpe); +free_gpio: + if (pdata->irq_over_gpio) + gpio_free(pdata->irq_gpio); out_free: kfree(stmpe); return ret; } -static int __devexit stmpe_remove(struct i2c_client *client) +int stmpe_remove(struct stmpe *stmpe) { - struct stmpe *stmpe = i2c_get_clientdata(client); - mfd_remove_devices(stmpe->dev); - free_irq(stmpe->i2c->irq, stmpe); + free_irq(stmpe->irq, stmpe); stmpe_irq_remove(stmpe); + if (stmpe->pdata->irq_over_gpio) + gpio_free(stmpe->pdata->irq_gpio); + kfree(stmpe); return 0; } -static const struct i2c_device_id stmpe_id[] = { - { "stmpe811", STMPE811 }, - { "stmpe1601", STMPE1601 }, - { "stmpe2401", STMPE2401 }, - { "stmpe2403", STMPE2403 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, stmpe_id); - #ifdef CONFIG_PM -static const struct dev_pm_ops stmpe_dev_pm_ops = { - .suspend = stmpe_suspend, - .resume = stmpe_resume, -}; -#endif +static int stmpe_suspend(struct device *dev) +{ + struct stmpe *stmpe = dev_get_drvdata(dev); -static struct i2c_driver stmpe_driver = { - .driver.name = "stmpe", - .driver.owner = THIS_MODULE, -#ifdef CONFIG_PM - .driver.pm = &stmpe_dev_pm_ops, -#endif - .probe = stmpe_probe, - .remove = __devexit_p(stmpe_remove), - .id_table = stmpe_id, -}; + if (device_may_wakeup(dev)) + enable_irq_wake(stmpe->irq); -static int __init stmpe_init(void) -{ - return i2c_add_driver(&stmpe_driver); + return 0; } -subsys_initcall(stmpe_init); -static void __exit stmpe_exit(void) +static int stmpe_resume(struct device *dev) { - i2c_del_driver(&stmpe_driver); + struct stmpe *stmpe = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + disable_irq_wake(stmpe->irq); + + return 0; } -module_exit(stmpe_exit); -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("STMPE MFD core driver"); -MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>"); +const struct dev_pm_ops stmpe_dev_pm_ops = { + .suspend = stmpe_suspend, + .resume = stmpe_resume, +}; +#endif diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h index e4ee3895658..7b8e13f5b76 100644 --- a/drivers/mfd/stmpe.h +++ b/drivers/mfd/stmpe.h @@ -8,6 +8,14 @@ #ifndef __STMPE_H #define __STMPE_H +#include <linux/device.h> +#include <linux/mfd/core.h> +#include <linux/mfd/stmpe.h> +#include <linux/printk.h> +#include <linux/types.h> + +extern const struct dev_pm_ops stmpe_dev_pm_ops; + #ifdef STMPE_DUMP_BYTES static inline void stmpe_dump_bytes(const char *str, const void *buf, size_t len) @@ -67,11 +75,55 @@ struct stmpe_variant_info { int (*enable_autosleep)(struct stmpe *stmpe, int autosleep_timeout); }; +/** + * struct stmpe_client_info - i2c or spi specific routines/info + * @data: client specific data + * @read_byte: read single byte + * @write_byte: write single byte + * @read_block: read block or multiple bytes + * @write_block: write block or multiple bytes + * @init: client init routine, called during probe + */ +struct stmpe_client_info { + void *data; + int irq; + void *client; + struct device *dev; + int (*read_byte)(struct stmpe *stmpe, u8 reg); + int (*write_byte)(struct stmpe *stmpe, u8 reg, u8 val); + int (*read_block)(struct stmpe *stmpe, u8 reg, u8 len, u8 *values); + int (*write_block)(struct stmpe *stmpe, u8 reg, u8 len, + const u8 *values); + void (*init)(struct stmpe *stmpe); +}; + +int stmpe_probe(struct stmpe_client_info *ci, int partnum); +int stmpe_remove(struct stmpe *stmpe); + #define STMPE_ICR_LSB_HIGH (1 << 2) #define STMPE_ICR_LSB_EDGE (1 << 1) #define STMPE_ICR_LSB_GIM (1 << 0) /* + * STMPE801 + */ +#define STMPE801_ID 0x0108 +#define STMPE801_NR_INTERNAL_IRQS 1 + +#define STMPE801_REG_CHIP_ID 0x00 +#define STMPE801_REG_VERSION_ID 0x02 +#define STMPE801_REG_SYS_CTRL 0x04 +#define STMPE801_REG_GPIO_INT_EN 0x08 +#define STMPE801_REG_GPIO_INT_STA 0x09 +#define STMPE801_REG_GPIO_MP_STA 0x10 +#define STMPE801_REG_GPIO_SET_PIN 0x11 +#define STMPE801_REG_GPIO_DIR 0x12 + +#define STMPE801_REG_SYS_CTRL_RESET (1 << 7) +#define STMPE801_REG_SYS_CTRL_INT_EN (1 << 2) +#define STMPE801_REG_SYS_CTRL_INT_HI (1 << 0) + +/* * STMPE811 */ @@ -87,6 +139,7 @@ struct stmpe_variant_info { #define STMPE811_REG_CHIP_ID 0x00 #define STMPE811_REG_SYS_CTRL2 0x04 +#define STMPE811_REG_SPI_CFG 0x08 #define STMPE811_REG_INT_CTRL 0x09 #define STMPE811_REG_INT_EN 0x0A #define STMPE811_REG_INT_STA 0x0B diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c index 91ad21ef772..2d9e8799e73 100644 --- a/drivers/mfd/t7l66xb.c +++ b/drivers/mfd/t7l66xb.c @@ -442,21 +442,7 @@ static struct platform_driver t7l66xb_platform_driver = { /*--------------------------------------------------------------------------*/ -static int __init t7l66xb_init(void) -{ - int retval = 0; - - retval = platform_driver_register(&t7l66xb_platform_driver); - return retval; -} - -static void __exit t7l66xb_exit(void) -{ - platform_driver_unregister(&t7l66xb_platform_driver); -} - -module_init(t7l66xb_init); -module_exit(t7l66xb_exit); +module_platform_driver(t7l66xb_platform_driver); MODULE_DESCRIPTION("Toshiba T7L66XB core driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c index 71bc835324d..d20a284ad4b 100644 --- a/drivers/mfd/tc6387xb.c +++ b/drivers/mfd/tc6387xb.c @@ -234,19 +234,7 @@ static struct platform_driver tc6387xb_platform_driver = { .resume = tc6387xb_resume, }; - -static int __init tc6387xb_init(void) -{ - return platform_driver_register(&tc6387xb_platform_driver); -} - -static void __exit tc6387xb_exit(void) -{ - platform_driver_unregister(&tc6387xb_platform_driver); -} - -module_init(tc6387xb_init); -module_exit(tc6387xb_exit); +module_platform_driver(tc6387xb_platform_driver); MODULE_DESCRIPTION("Toshiba TC6387XB core driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mfd/ti-ssp.c b/drivers/mfd/ti-ssp.c index af9ab0e5ca6..4fb0e6c8e8f 100644 --- a/drivers/mfd/ti-ssp.c +++ b/drivers/mfd/ti-ssp.c @@ -458,17 +458,7 @@ static struct platform_driver ti_ssp_driver = { } }; -static int __init ti_ssp_init(void) -{ - return platform_driver_register(&ti_ssp_driver); -} -module_init(ti_ssp_init); - -static void __exit ti_ssp_exit(void) -{ - platform_driver_unregister(&ti_ssp_driver); -} -module_exit(ti_ssp_exit); +module_platform_driver(ti_ssp_driver); MODULE_DESCRIPTION("Sequencer Serial Port (SSP) Driver"); MODULE_AUTHOR("Cyril Chemparathy"); diff --git a/drivers/mfd/timberdale.c b/drivers/mfd/timberdale.c index 02d65692ceb..0ba26fb12cf 100644 --- a/drivers/mfd/timberdale.c +++ b/drivers/mfd/timberdale.c @@ -857,7 +857,7 @@ static void __devexit timb_remove(struct pci_dev *dev) kfree(priv); } -static struct pci_device_id timberdale_pci_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(timberdale_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_TIMB, PCI_DEVICE_ID_TIMB) }, { 0 } }; diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c index a56be931551..95c0d7978be 100644 --- a/drivers/mfd/tps65910-irq.c +++ b/drivers/mfd/tps65910-irq.c @@ -215,6 +215,7 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq, int tps65910_irq_exit(struct tps65910 *tps65910) { - free_irq(tps65910->chip_irq, tps65910); + if (tps65910->chip_irq) + free_irq(tps65910->chip_irq, tps65910); return 0; } diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index c1da84bc157..01cf5012a08 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -172,15 +172,12 @@ static int tps65910_i2c_probe(struct i2c_client *i2c, tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base); - ret = tps65910_irq_init(tps65910, init_data->irq, init_data); - if (ret < 0) - goto err; + tps65910_irq_init(tps65910, init_data->irq, init_data); kfree(init_data); return ret; err: - mfd_remove_devices(tps65910->dev); kfree(tps65910); kfree(init_data); return ret; @@ -190,8 +187,8 @@ static int tps65910_i2c_remove(struct i2c_client *i2c) { struct tps65910 *tps65910 = i2c_get_clientdata(i2c); - mfd_remove_devices(tps65910->dev); tps65910_irq_exit(tps65910); + mfd_remove_devices(tps65910->dev); kfree(tps65910); return 0; diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c index 6d71e0d2574..27d3302d56b 100644 --- a/drivers/mfd/tps65912-spi.c +++ b/drivers/mfd/tps65912-spi.c @@ -111,7 +111,6 @@ static int __devexit tps65912_spi_remove(struct spi_device *spi) static struct spi_driver tps65912_spi_driver = { .driver = { .name = "tps65912", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, .probe = tps65912_spi_probe, diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index 61e70cfaa77..e04e04ddc15 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -34,6 +34,11 @@ #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/device.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/irqdomain.h> #include <linux/regulator/machine.h> @@ -144,6 +149,9 @@ #define TWL_MODULE_LAST TWL4030_MODULE_LAST +#define TWL4030_NR_IRQS 8 +#define TWL6030_NR_IRQS 20 + /* Base Address defns for twl4030_map[] */ /* subchip/slave 0 - USB ID */ @@ -255,6 +263,7 @@ struct twl_client { static struct twl_client twl_modules[TWL_NUM_SLAVES]; +static struct irq_domain domain; /* mapping the module id to slave id and base address */ struct twl_mapping { @@ -1183,14 +1192,48 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id) int status; unsigned i; struct twl4030_platform_data *pdata = client->dev.platform_data; + struct device_node *node = client->dev.of_node; u8 temp; int ret = 0; + int nr_irqs = TWL4030_NR_IRQS; + + if ((id->driver_data) & TWL6030_CLASS) + nr_irqs = TWL6030_NR_IRQS; + + if (node && !pdata) { + /* + * XXX: Temporary pdata until the information is correctly + * retrieved by every TWL modules from DT. + */ + pdata = devm_kzalloc(&client->dev, + sizeof(struct twl4030_platform_data), + GFP_KERNEL); + if (!pdata) + return -ENOMEM; + } if (!pdata) { dev_dbg(&client->dev, "no platform data?\n"); return -EINVAL; } + status = irq_alloc_descs(-1, pdata->irq_base, nr_irqs, 0); + if (IS_ERR_VALUE(status)) { + dev_err(&client->dev, "Fail to allocate IRQ descs\n"); + return status; + } + + pdata->irq_base = status; + pdata->irq_end = pdata->irq_base + nr_irqs; + + domain.irq_base = pdata->irq_base; + domain.nr_irq = nr_irqs; +#ifdef CONFIG_OF_IRQ + domain.of_node = of_node_get(node); + domain.ops = &irq_domain_simple_ops; +#endif + irq_domain_add(&domain); + if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C) == 0) { dev_dbg(&client->dev, "can't talk I2C?\n"); return -EIO; @@ -1270,7 +1313,13 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id) twl_i2c_write_u8(TWL4030_MODULE_INTBR, temp, REG_GPPUPDCTR1); } - status = add_children(pdata, id->driver_data); +#ifdef CONFIG_OF_DEVICE + if (node) + status = of_platform_populate(node, NULL, NULL, &client->dev); + else +#endif + status = add_children(pdata, id->driver_data); + fail: if (status < 0) twl_remove(client); diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c index ae51ab5d0e5..838ce4eb444 100644 --- a/drivers/mfd/twl4030-audio.c +++ b/drivers/mfd/twl4030-audio.c @@ -261,17 +261,7 @@ static struct platform_driver twl4030_audio_driver = { }, }; -static int __devinit twl4030_audio_init(void) -{ - return platform_driver_register(&twl4030_audio_driver); -} -module_init(twl4030_audio_init); - -static void __devexit twl4030_audio_exit(void) -{ - platform_driver_unregister(&twl4030_audio_driver); -} -module_exit(twl4030_audio_exit); +module_platform_driver(twl4030_audio_driver); MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 29f11e0765f..b69bb517b10 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -492,7 +492,7 @@ static void twl4030_sih_bus_sync_unlock(struct irq_data *data) u8 bytes[4]; } imr; - /* byte[0] gets overwriten as we write ... */ + /* byte[0] gets overwritten as we write ... */ imr.word = cpu_to_le32(agent->imr << 8); agent->imr_change_pending = false; @@ -667,6 +667,7 @@ int twl4030_sih_setup(int module) irq_set_chip_data(irq, agent); irq_set_chip_and_handler(irq, &twl4030_sih_irq_chip, handle_edge_irq); + irq_set_nested_thread(irq, 1); activate_irq(irq); } diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c index 834f824d3c1..456ecb5ac4f 100644 --- a/drivers/mfd/twl4030-madc.c +++ b/drivers/mfd/twl4030-madc.c @@ -807,19 +807,7 @@ static struct platform_driver twl4030_madc_driver = { }, }; -static int __init twl4030_madc_init(void) -{ - return platform_driver_register(&twl4030_madc_driver); -} - -module_init(twl4030_madc_init); - -static void __exit twl4030_madc_exit(void) -{ - platform_driver_unregister(&twl4030_madc_driver); -} - -module_exit(twl4030_madc_exit); +module_platform_driver(twl4030_madc_driver); MODULE_DESCRIPTION("TWL4030 ADC driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index a764676f092..d905f517115 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -34,7 +34,8 @@ static u8 twl4030_start_script_address = 0x2b; #define PWR_P1_SW_EVENTS 0x10 -#define PWR_DEVOFF (1<<0) +#define PWR_DEVOFF (1 << 0) +#define SEQ_OFFSYNC (1 << 0) #define PHY_TO_OFF_PM_MASTER(p) (p - 0x36) #define PHY_TO_OFF_PM_RECEIVER(p) (p - 0x5b) @@ -511,12 +512,27 @@ int twl4030_remove_script(u8 flags) return err; } +/* + * In master mode, start the power off sequence. + * After a successful execution, TWL shuts down the power to the SoC + * and all peripherals connected to it. + */ +void twl4030_power_off(void) +{ + int err; + + err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, PWR_DEVOFF, + TWL4030_PM_MASTER_P1_SW_EVENTS); + if (err) + pr_err("TWL4030 Unable to power off\n"); +} + void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts) { int err = 0; int i; struct twl4030_resconfig *resconfig; - u8 address = twl4030_start_script_address; + u8 val, address = twl4030_start_script_address; err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG1, @@ -548,6 +564,28 @@ void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts) } } + /* Board has to be wired properly to use this feature */ + if (twl4030_scripts->use_poweroff && !pm_power_off) { + /* Default for SEQ_OFFSYNC is set, lets ensure this */ + err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &val, + TWL4030_PM_MASTER_CFG_P123_TRANSITION); + if (err) { + pr_warning("TWL4030 Unable to read registers\n"); + + } else if (!(val & SEQ_OFFSYNC)) { + val |= SEQ_OFFSYNC; + err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, val, + TWL4030_PM_MASTER_CFG_P123_TRANSITION); + if (err) { + pr_err("TWL4030 Unable to setup SEQ_OFFSYNC\n"); + goto relock; + } + } + + pm_power_off = twl4030_power_off; + } + +relock: err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, TWL4030_PM_MASTER_PROTECT_KEY); if (err) diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c index 268f80fd043..dda86293dc9 100644 --- a/drivers/mfd/twl6040-core.c +++ b/drivers/mfd/twl6040-core.c @@ -509,13 +509,10 @@ static int __devinit twl6040_probe(struct platform_device *pdev) twl6040->audpwron = -EINVAL; if (gpio_is_valid(twl6040->audpwron)) { - ret = gpio_request(twl6040->audpwron, "audpwron"); + ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW, + "audpwron"); if (ret) goto gpio1_err; - - ret = gpio_direction_output(twl6040->audpwron, 0); - if (ret) - goto gpio2_err; } /* codec interrupt */ @@ -619,18 +616,7 @@ static struct platform_driver twl6040_driver = { }, }; -static int __devinit twl6040_init(void) -{ - return platform_driver_register(&twl6040_driver); -} -module_init(twl6040_init); - -static void __devexit twl6040_exit(void) -{ - platform_driver_unregister(&twl6040_driver); -} - -module_exit(twl6040_exit); +module_platform_driver(twl6040_driver); MODULE_DESCRIPTION("TWL6040 MFD"); MODULE_AUTHOR("Misael Lopez Cruz <misael.lopez@ti.com>"); diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index b281217334e..91c4f25e0e5 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -36,6 +36,15 @@ static DEFINE_MUTEX(ucb1x00_mutex); static LIST_HEAD(ucb1x00_drivers); static LIST_HEAD(ucb1x00_devices); +static struct mcp_device_id ucb1x00_id[] = { + { "ucb1x00", 0 }, /* auto-detection */ + { "ucb1200", UCB_ID_1200 }, + { "ucb1300", UCB_ID_1300 }, + { "tc35143", UCB_ID_TC35143 }, + { } +}; +MODULE_DEVICE_TABLE(mcp, ucb1x00_id); + /** * ucb1x00_io_set_dir - set IO direction * @ucb: UCB1x00 structure describing chip @@ -527,17 +536,33 @@ static struct class ucb1x00_class = { static int ucb1x00_probe(struct mcp *mcp) { + const struct mcp_device_id *mid; struct ucb1x00 *ucb; struct ucb1x00_driver *drv; + struct ucb1x00_plat_data *pdata; unsigned int id; int ret = -ENODEV; int temp; mcp_enable(mcp); id = mcp_reg_read(mcp, UCB_ID); + mid = mcp_get_device_id(mcp); - if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) { - printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id); + if (mid && mid->driver_data) { + if (id != mid->driver_data) { + printk(KERN_WARNING "%s wrong ID %04x found: %04x\n", + mid->name, (unsigned int) mid->driver_data, id); + goto err_disable; + } + } else { + mid = &ucb1x00_id[1]; + while (mid->driver_data) { + if (id == mid->driver_data) + break; + mid++; + } + printk(KERN_WARNING "%s ID not found: %04x\n", + ucb1x00_id[0].name, id); goto err_disable; } @@ -546,28 +571,28 @@ static int ucb1x00_probe(struct mcp *mcp) if (!ucb) goto err_disable; - + pdata = mcp->attached_device.platform_data; ucb->dev.class = &ucb1x00_class; ucb->dev.parent = &mcp->attached_device; - dev_set_name(&ucb->dev, "ucb1x00"); + dev_set_name(&ucb->dev, mid->name); spin_lock_init(&ucb->lock); spin_lock_init(&ucb->io_lock); sema_init(&ucb->adc_sem, 1); - ucb->id = id; + ucb->id = mid; ucb->mcp = mcp; ucb->irq = ucb1x00_detect_irq(ucb); if (ucb->irq == NO_IRQ) { - printk(KERN_ERR "UCB1x00: IRQ probe failed\n"); + printk(KERN_ERR "%s: IRQ probe failed\n", mid->name); ret = -ENODEV; goto err_free; } ucb->gpio.base = -1; - if (mcp->gpio_base != 0) { + if (pdata && (pdata->gpio_base >= 0)) { ucb->gpio.label = dev_name(&ucb->dev); - ucb->gpio.base = mcp->gpio_base; + ucb->gpio.base = pdata->gpio_base; ucb->gpio.ngpio = 10; ucb->gpio.set = ucb1x00_gpio_set; ucb->gpio.get = ucb1x00_gpio_get; @@ -580,10 +605,10 @@ static int ucb1x00_probe(struct mcp *mcp) dev_info(&ucb->dev, "gpio_base not set so no gpiolib support"); ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING, - "UCB1x00", ucb); + mid->name, ucb); if (ret) { - printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", - ucb->irq, ret); + printk(KERN_ERR "%s: unable to grab irq%d: %d\n", + mid->name, ucb->irq, ret); goto err_gpio; } @@ -705,6 +730,7 @@ static struct mcp_driver ucb1x00_driver = { .remove = ucb1x00_remove, .suspend = ucb1x00_suspend, .resume = ucb1x00_resume, + .id_table = ucb1x00_id, }; static int __init ucb1x00_init(void) diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 38ffbd50a0d..40ec3c11886 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -382,7 +382,7 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev) ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC; idev->name = "Touchscreen panel"; - idev->id.product = ts->ucb->id; + idev->id.product = ts->ucb->id->driver_data; idev->open = ucb1x00_ts_open; idev->close = ucb1x00_ts_close; diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c index d698703dbd4..b73cc15e008 100644 --- a/drivers/mfd/vx855.c +++ b/drivers/mfd/vx855.c @@ -118,7 +118,7 @@ static void __devexit vx855_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static struct pci_device_id vx855_pci_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(vx855_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VX855) }, { 0, } }; diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index 0a2b8d41a70..f5e54fae8ad 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -559,6 +559,8 @@ static int wm831x_write(struct wm831x *wm831x, unsigned short reg, dev_vdbg(wm831x->dev, "Write %04x to R%d(0x%x)\n", buf[i], reg + i, reg + i); ret = regmap_write(wm831x->regmap, reg + i, buf[i]); + if (ret != 0) + return ret; } return 0; @@ -1875,7 +1877,6 @@ err_irq: err_regmap: mfd_remove_devices(wm831x->dev); regmap_exit(wm831x->regmap); - kfree(wm831x); return ret; } @@ -1887,7 +1888,6 @@ void wm831x_device_exit(struct wm831x *wm831x) free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x); wm831x_irq_exit(wm831x); regmap_exit(wm831x->regmap); - kfree(wm831x); } int wm831x_device_suspend(struct wm831x *wm831x) diff --git a/drivers/mfd/wm831x-i2c.c b/drivers/mfd/wm831x-i2c.c index ac8da1d439d..cb15609b0a4 100644 --- a/drivers/mfd/wm831x-i2c.c +++ b/drivers/mfd/wm831x-i2c.c @@ -30,7 +30,7 @@ static int wm831x_i2c_probe(struct i2c_client *i2c, struct wm831x *wm831x; int ret; - wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL); + wm831x = devm_kzalloc(&i2c->dev, sizeof(struct wm831x), GFP_KERNEL); if (wm831x == NULL) return -ENOMEM; @@ -42,7 +42,6 @@ static int wm831x_i2c_probe(struct i2c_client *i2c, ret = PTR_ERR(wm831x->regmap); dev_err(wm831x->dev, "Failed to allocate register map: %d\n", ret); - kfree(wm831x); return ret; } diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c index f4747a4a9a9..bec4d053916 100644 --- a/drivers/mfd/wm831x-irq.c +++ b/drivers/mfd/wm831x-irq.c @@ -325,11 +325,6 @@ static inline int irq_data_to_status_reg(struct wm831x_irq_data *irq_data) return WM831X_INTERRUPT_STATUS_1 - 1 + irq_data->reg; } -static inline int irq_data_to_mask_reg(struct wm831x_irq_data *irq_data) -{ - return WM831X_INTERRUPT_STATUS_1_MASK - 1 + irq_data->reg; -} - static inline struct wm831x_irq_data *irq_to_wm831x_irq(struct wm831x *wm831x, int irq) { @@ -477,8 +472,7 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data) handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHPD); if (primary & WM831X_TCHDATA_INT) handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHDATA); - if (primary & (WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT)) - goto out; + primary &= ~(WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT); for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) { int offset = wm831x_irqs[i].reg - 1; diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c index 8d6a9a969db..62ef3254105 100644 --- a/drivers/mfd/wm831x-spi.c +++ b/drivers/mfd/wm831x-spi.c @@ -30,7 +30,7 @@ static int __devinit wm831x_spi_probe(struct spi_device *spi) type = (enum wm831x_parent)id->driver_data; - wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL); + wm831x = devm_kzalloc(&spi->dev, sizeof(struct wm831x), GFP_KERNEL); if (wm831x == NULL) return -ENOMEM; @@ -45,7 +45,6 @@ static int __devinit wm831x_spi_probe(struct spi_device *spi) ret = PTR_ERR(wm831x->regmap); dev_err(wm831x->dev, "Failed to allocate register map: %d\n", ret); - kfree(wm831x); return ret; } @@ -95,7 +94,6 @@ MODULE_DEVICE_TABLE(spi, wm831x_spi_id); static struct spi_driver wm831x_spi_driver = { .driver = { .name = "wm831x", - .bus = &spi_bus_type, .owner = THIS_MODULE, .pm = &wm831x_spi_pm, }, diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index e81cc31e420..dd1caaac55e 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -573,6 +573,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq, u16 id1, id2, mask_rev; u16 cust_id, mode, chip_rev; + dev_set_drvdata(wm8350->dev, wm8350); + /* get WM8350 revision and config mode */ ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1); if (ret != 0) { diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c index 5fe5de166ad..d955faaf27c 100644 --- a/drivers/mfd/wm8350-i2c.c +++ b/drivers/mfd/wm8350-i2c.c @@ -63,7 +63,7 @@ static int wm8350_i2c_probe(struct i2c_client *i2c, struct wm8350 *wm8350; int ret = 0; - wm8350 = kzalloc(sizeof(struct wm8350), GFP_KERNEL); + wm8350 = devm_kzalloc(&i2c->dev, sizeof(struct wm8350), GFP_KERNEL); if (wm8350 == NULL) return -ENOMEM; @@ -80,7 +80,6 @@ static int wm8350_i2c_probe(struct i2c_client *i2c, return ret; err: - kfree(wm8350); return ret; } @@ -89,7 +88,6 @@ static int wm8350_i2c_remove(struct i2c_client *i2c) struct wm8350 *wm8350 = i2c_get_clientdata(i2c); wm8350_device_exit(wm8350); - kfree(wm8350); return 0; } diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c index 62b4626f456..2204893444a 100644 --- a/drivers/mfd/wm8400-core.c +++ b/drivers/mfd/wm8400-core.c @@ -344,7 +344,7 @@ static int wm8400_i2c_probe(struct i2c_client *i2c, struct wm8400 *wm8400; int ret; - wm8400 = kzalloc(sizeof(struct wm8400), GFP_KERNEL); + wm8400 = devm_kzalloc(&i2c->dev, sizeof(struct wm8400), GFP_KERNEL); if (wm8400 == NULL) { ret = -ENOMEM; goto err; @@ -353,7 +353,7 @@ static int wm8400_i2c_probe(struct i2c_client *i2c, wm8400->regmap = regmap_init_i2c(i2c, &wm8400_regmap_config); if (IS_ERR(wm8400->regmap)) { ret = PTR_ERR(wm8400->regmap); - goto struct_err; + goto err; } wm8400->dev = &i2c->dev; @@ -367,8 +367,6 @@ static int wm8400_i2c_probe(struct i2c_client *i2c, map_err: regmap_exit(wm8400->regmap); -struct_err: - kfree(wm8400); err: return ret; } @@ -379,7 +377,6 @@ static int wm8400_i2c_remove(struct i2c_client *i2c) wm8400_release(wm8400); regmap_exit(wm8400->regmap); - kfree(wm8400); return 0; } diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 5664696f2d3..6a1a092db14 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -500,6 +500,14 @@ config USB_SWITCH_FSA9480 stereo and mono audio, video, microphone and UART data to use a common connector port. +config MAX8997_MUIC + tristate "MAX8997 MUIC Support" + depends on MFD_MAX8997 + help + If you say yes here you get support for the MUIC device of + Maxim MAX8997 PMIC. + The MAX8997 MUIC is a USB port accessory detector and switch. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index b26495a0255..3e1d80106f0 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -48,3 +48,4 @@ obj-y += lis3lv02d/ obj-y += carma/ obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/ +obj-$(CONFIG_MAX8997_MUIC) += max8997-muic.o diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c index 2208a9d5262..d7a9aa14e5d 100644 --- a/drivers/misc/ab8500-pwm.c +++ b/drivers/misc/ab8500-pwm.c @@ -8,8 +8,8 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/pwm.h> -#include <linux/mfd/ab8500.h> #include <linux/mfd/abx500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/module.h> /* diff --git a/drivers/misc/max8997-muic.c b/drivers/misc/max8997-muic.c new file mode 100644 index 00000000000..d74ef41aabd --- /dev/null +++ b/drivers/misc/max8997-muic.c @@ -0,0 +1,505 @@ +/* + * max8997-muic.c - MAX8997 muic driver for the Maxim 8997 + * + * Copyright (C) 2011 Samsung Electrnoics + * Donggeun Kim <dg77.kim@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/err.h> +#include <linux/platform_device.h> +#include <linux/kobject.h> +#include <linux/mfd/max8997.h> +#include <linux/mfd/max8997-private.h> + +/* MAX8997-MUIC STATUS1 register */ +#define STATUS1_ADC_SHIFT 0 +#define STATUS1_ADCLOW_SHIFT 5 +#define STATUS1_ADCERR_SHIFT 6 +#define STATUS1_ADC_MASK (0x1f << STATUS1_ADC_SHIFT) +#define STATUS1_ADCLOW_MASK (0x1 << STATUS1_ADCLOW_SHIFT) +#define STATUS1_ADCERR_MASK (0x1 << STATUS1_ADCERR_SHIFT) + +/* MAX8997-MUIC STATUS2 register */ +#define STATUS2_CHGTYP_SHIFT 0 +#define STATUS2_CHGDETRUN_SHIFT 3 +#define STATUS2_DCDTMR_SHIFT 4 +#define STATUS2_DBCHG_SHIFT 5 +#define STATUS2_VBVOLT_SHIFT 6 +#define STATUS2_CHGTYP_MASK (0x7 << STATUS2_CHGTYP_SHIFT) +#define STATUS2_CHGDETRUN_MASK (0x1 << STATUS2_CHGDETRUN_SHIFT) +#define STATUS2_DCDTMR_MASK (0x1 << STATUS2_DCDTMR_SHIFT) +#define STATUS2_DBCHG_MASK (0x1 << STATUS2_DBCHG_SHIFT) +#define STATUS2_VBVOLT_MASK (0x1 << STATUS2_VBVOLT_SHIFT) + +/* MAX8997-MUIC STATUS3 register */ +#define STATUS3_OVP_SHIFT 2 +#define STATUS3_OVP_MASK (0x1 << STATUS3_OVP_SHIFT) + +/* MAX8997-MUIC CONTROL1 register */ +#define COMN1SW_SHIFT 0 +#define COMP2SW_SHIFT 3 +#define COMN1SW_MASK (0x7 << COMN1SW_SHIFT) +#define COMP2SW_MASK (0x7 << COMP2SW_SHIFT) +#define SW_MASK (COMP2SW_MASK | COMN1SW_MASK) + +#define MAX8997_SW_USB ((1 << COMP2SW_SHIFT) | (1 << COMN1SW_SHIFT)) +#define MAX8997_SW_AUDIO ((2 << COMP2SW_SHIFT) | (2 << COMN1SW_SHIFT)) +#define MAX8997_SW_UART ((3 << COMP2SW_SHIFT) | (3 << COMN1SW_SHIFT)) +#define MAX8997_SW_OPEN ((0 << COMP2SW_SHIFT) | (0 << COMN1SW_SHIFT)) + +#define MAX8997_ADC_GROUND 0x00 +#define MAX8997_ADC_MHL 0x01 +#define MAX8997_ADC_JIG_USB_1 0x18 +#define MAX8997_ADC_JIG_USB_2 0x19 +#define MAX8997_ADC_DESKDOCK 0x1a +#define MAX8997_ADC_JIG_UART 0x1c +#define MAX8997_ADC_CARDOCK 0x1d +#define MAX8997_ADC_OPEN 0x1f + +struct max8997_muic_irq { + unsigned int irq; + const char *name; +}; + +static struct max8997_muic_irq muic_irqs[] = { + { MAX8997_MUICIRQ_ADCError, "muic-ADC_error" }, + { MAX8997_MUICIRQ_ADCLow, "muic-ADC_low" }, + { MAX8997_MUICIRQ_ADC, "muic-ADC" }, + { MAX8997_MUICIRQ_VBVolt, "muic-VB_voltage" }, + { MAX8997_MUICIRQ_DBChg, "muic-DB_charger" }, + { MAX8997_MUICIRQ_DCDTmr, "muic-DCD_timer" }, + { MAX8997_MUICIRQ_ChgDetRun, "muic-CDR_status" }, + { MAX8997_MUICIRQ_ChgTyp, "muic-charger_type" }, + { MAX8997_MUICIRQ_OVP, "muic-over_voltage" }, +}; + +struct max8997_muic_info { + struct device *dev; + struct max8997_dev *iodev; + struct i2c_client *muic; + struct max8997_muic_platform_data *muic_pdata; + + int irq; + struct work_struct irq_work; + + enum max8997_muic_charger_type pre_charger_type; + int pre_adc; + + struct mutex mutex; +}; + +static int max8997_muic_handle_usb(struct max8997_muic_info *info, + enum max8997_muic_usb_type usb_type, bool attached) +{ + struct max8997_muic_platform_data *mdata = info->muic_pdata; + int ret = 0; + + if (usb_type == MAX8997_USB_HOST) { + /* switch to USB */ + ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1, + attached ? MAX8997_SW_USB : MAX8997_SW_OPEN, + SW_MASK); + if (ret) { + dev_err(info->dev, "failed to update muic register\n"); + goto out; + } + } + + if (mdata->usb_callback) + mdata->usb_callback(usb_type, attached); +out: + return ret; +} + +static void max8997_muic_handle_mhl(struct max8997_muic_info *info, + bool attached) +{ + struct max8997_muic_platform_data *mdata = info->muic_pdata; + + if (mdata->mhl_callback) + mdata->mhl_callback(attached); +} + +static int max8997_muic_handle_dock(struct max8997_muic_info *info, + int adc, bool attached) +{ + struct max8997_muic_platform_data *mdata = info->muic_pdata; + int ret = 0; + + /* switch to AUDIO */ + ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1, + attached ? MAX8997_SW_AUDIO : MAX8997_SW_OPEN, + SW_MASK); + if (ret) { + dev_err(info->dev, "failed to update muic register\n"); + goto out; + } + + switch (adc) { + case MAX8997_ADC_DESKDOCK: + if (mdata->deskdock_callback) + mdata->deskdock_callback(attached); + break; + case MAX8997_ADC_CARDOCK: + if (mdata->cardock_callback) + mdata->cardock_callback(attached); + break; + default: + break; + } +out: + return ret; +} + +static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info, + bool attached) +{ + struct max8997_muic_platform_data *mdata = info->muic_pdata; + int ret = 0; + + /* switch to UART */ + ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1, + attached ? MAX8997_SW_UART : MAX8997_SW_OPEN, + SW_MASK); + if (ret) { + dev_err(info->dev, "failed to update muic register\n"); + goto out; + } + + if (mdata->uart_callback) + mdata->uart_callback(attached); +out: + return ret; +} + +static int max8997_muic_handle_adc_detach(struct max8997_muic_info *info) +{ + int ret = 0; + + switch (info->pre_adc) { + case MAX8997_ADC_GROUND: + ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, false); + break; + case MAX8997_ADC_MHL: + max8997_muic_handle_mhl(info, false); + break; + case MAX8997_ADC_JIG_USB_1: + case MAX8997_ADC_JIG_USB_2: + ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, false); + break; + case MAX8997_ADC_DESKDOCK: + case MAX8997_ADC_CARDOCK: + ret = max8997_muic_handle_dock(info, info->pre_adc, false); + break; + case MAX8997_ADC_JIG_UART: + ret = max8997_muic_handle_jig_uart(info, false); + break; + default: + break; + } + + return ret; +} + +static int max8997_muic_handle_adc(struct max8997_muic_info *info, int adc) +{ + int ret = 0; + + switch (adc) { + case MAX8997_ADC_GROUND: + ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, true); + break; + case MAX8997_ADC_MHL: + max8997_muic_handle_mhl(info, true); + break; + case MAX8997_ADC_JIG_USB_1: + case MAX8997_ADC_JIG_USB_2: + ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, true); + break; + case MAX8997_ADC_DESKDOCK: + case MAX8997_ADC_CARDOCK: + ret = max8997_muic_handle_dock(info, adc, true); + break; + case MAX8997_ADC_JIG_UART: + ret = max8997_muic_handle_jig_uart(info, true); + break; + case MAX8997_ADC_OPEN: + ret = max8997_muic_handle_adc_detach(info); + break; + default: + break; + } + + info->pre_adc = adc; + + return ret; +} + +static int max8997_muic_handle_charger_type(struct max8997_muic_info *info, + enum max8997_muic_charger_type charger_type) +{ + struct max8997_muic_platform_data *mdata = info->muic_pdata; + u8 adc; + int ret; + + ret = max8997_read_reg(info->muic, MAX8997_MUIC_REG_STATUS1, &adc); + if (ret) { + dev_err(info->dev, "failed to read muic register\n"); + goto out; + } + + switch (charger_type) { + case MAX8997_CHARGER_TYPE_NONE: + if (mdata->charger_callback) + mdata->charger_callback(false, charger_type); + if (info->pre_charger_type == MAX8997_CHARGER_TYPE_USB) { + max8997_muic_handle_usb(info, + MAX8997_USB_DEVICE, false); + } + break; + case MAX8997_CHARGER_TYPE_USB: + if ((adc & STATUS1_ADC_MASK) == MAX8997_ADC_OPEN) { + max8997_muic_handle_usb(info, + MAX8997_USB_DEVICE, true); + } + if (mdata->charger_callback) + mdata->charger_callback(true, charger_type); + break; + case MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT: + case MAX8997_CHARGER_TYPE_DEDICATED_CHG: + case MAX8997_CHARGER_TYPE_500MA: + case MAX8997_CHARGER_TYPE_1A: + if (mdata->charger_callback) + mdata->charger_callback(true, charger_type); + break; + default: + break; + } + + info->pre_charger_type = charger_type; +out: + return ret; +} + +static void max8997_muic_irq_work(struct work_struct *work) +{ + struct max8997_muic_info *info = container_of(work, + struct max8997_muic_info, irq_work); + struct max8997_platform_data *pdata = + dev_get_platdata(info->iodev->dev); + u8 status[3]; + u8 adc, chg_type; + + int irq_type = info->irq - pdata->irq_base; + int ret; + + mutex_lock(&info->mutex); + + ret = max8997_bulk_read(info->muic, MAX8997_MUIC_REG_STATUS1, + 3, status); + if (ret) { + dev_err(info->dev, "failed to read muic register\n"); + mutex_unlock(&info->mutex); + return; + } + + dev_dbg(info->dev, "%s: STATUS1:0x%x, 2:0x%x\n", __func__, + status[0], status[1]); + + switch (irq_type) { + case MAX8997_MUICIRQ_ADC: + adc = status[0] & STATUS1_ADC_MASK; + adc >>= STATUS1_ADC_SHIFT; + + max8997_muic_handle_adc(info, adc); + break; + case MAX8997_MUICIRQ_ChgTyp: + chg_type = status[1] & STATUS2_CHGTYP_MASK; + chg_type >>= STATUS2_CHGTYP_SHIFT; + + max8997_muic_handle_charger_type(info, chg_type); + break; + default: + dev_info(info->dev, "misc interrupt: %s occurred\n", + muic_irqs[irq_type].name); + break; + } + + mutex_unlock(&info->mutex); + + return; +} + +static irqreturn_t max8997_muic_irq_handler(int irq, void *data) +{ + struct max8997_muic_info *info = data; + + dev_dbg(info->dev, "irq:%d\n", irq); + info->irq = irq; + + schedule_work(&info->irq_work); + + return IRQ_HANDLED; +} + +static void max8997_muic_detect_dev(struct max8997_muic_info *info) +{ + int ret; + u8 status[2], adc, chg_type; + + ret = max8997_bulk_read(info->muic, MAX8997_MUIC_REG_STATUS1, + 2, status); + if (ret) { + dev_err(info->dev, "failed to read muic register\n"); + return; + } + + dev_info(info->dev, "STATUS1:0x%x, STATUS2:0x%x\n", + status[0], status[1]); + + adc = status[0] & STATUS1_ADC_MASK; + adc >>= STATUS1_ADC_SHIFT; + + chg_type = status[1] & STATUS2_CHGTYP_MASK; + chg_type >>= STATUS2_CHGTYP_SHIFT; + + max8997_muic_handle_adc(info, adc); + max8997_muic_handle_charger_type(info, chg_type); +} + +static void max8997_initialize_device(struct max8997_muic_info *info) +{ + struct max8997_muic_platform_data *mdata = info->muic_pdata; + int i; + + for (i = 0; i < mdata->num_init_data; i++) { + max8997_write_reg(info->muic, mdata->init_data[i].addr, + mdata->init_data[i].data); + } +} + +static int __devinit max8997_muic_probe(struct platform_device *pdev) +{ + struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent); + struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev); + struct max8997_muic_info *info; + int ret, i; + + info = kzalloc(sizeof(struct max8997_muic_info), GFP_KERNEL); + if (!info) { + dev_err(&pdev->dev, "failed to allocate memory\n"); + ret = -ENOMEM; + goto err_kfree; + } + + if (!pdata->muic_pdata) { + dev_err(&pdev->dev, "failed to get platform_data\n"); + ret = -EINVAL; + goto err_pdata; + } + info->muic_pdata = pdata->muic_pdata; + + info->dev = &pdev->dev; + info->iodev = iodev; + info->muic = iodev->muic; + + platform_set_drvdata(pdev, info); + mutex_init(&info->mutex); + + INIT_WORK(&info->irq_work, max8997_muic_irq_work); + + for (i = 0; i < ARRAY_SIZE(muic_irqs); i++) { + struct max8997_muic_irq *muic_irq = &muic_irqs[i]; + + ret = request_threaded_irq(pdata->irq_base + muic_irq->irq, + NULL, max8997_muic_irq_handler, + 0, muic_irq->name, + info); + if (ret) { + dev_err(&pdev->dev, + "failed: irq request (IRQ: %d," + " error :%d)\n", + muic_irq->irq, ret); + + for (i = i - 1; i >= 0; i--) + free_irq(muic_irq->irq, info); + + goto err_irq; + } + } + + /* Initialize registers according to platform data */ + max8997_initialize_device(info); + + /* Initial device detection */ + max8997_muic_detect_dev(info); + + return ret; + +err_irq: +err_pdata: + kfree(info); +err_kfree: + return ret; +} + +static int __devexit max8997_muic_remove(struct platform_device *pdev) +{ + struct max8997_muic_info *info = platform_get_drvdata(pdev); + struct max8997_platform_data *pdata = + dev_get_platdata(info->iodev->dev); + int i; + + for (i = 0; i < ARRAY_SIZE(muic_irqs); i++) + free_irq(pdata->irq_base + muic_irqs[i].irq, info); + cancel_work_sync(&info->irq_work); + + kfree(info); + + return 0; +} + +static struct platform_driver max8997_muic_driver = { + .driver = { + .name = "max8997-muic", + .owner = THIS_MODULE, + }, + .probe = max8997_muic_probe, + .remove = __devexit_p(max8997_muic_remove), +}; + +static int __init max8997_muic_init(void) +{ + return platform_driver_register(&max8997_muic_driver); +} +module_init(max8997_muic_init); + +static void __exit max8997_muic_exit(void) +{ + platform_driver_unregister(&max8997_muic_driver); +} +module_exit(max8997_muic_exit); + +MODULE_DESCRIPTION("Maxim MAX8997 MUIC driver"); +MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile index 12eef393e21..400756ec7c4 100644 --- a/drivers/mmc/Makefile +++ b/drivers/mmc/Makefile @@ -6,5 +6,4 @@ subdir-ccflags-$(CONFIG_MMC_DEBUG) := -DDEBUG obj-$(CONFIG_MMC) += core/ obj-$(CONFIG_MMC) += card/ -obj-$(CONFIG_MMC) += host/ - +obj-$(subst m,y,$(CONFIG_MMC)) += host/ diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 1e0e27cbe98..0cad48a284a 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -107,6 +107,8 @@ struct mmc_blk_data { */ unsigned int part_curr; struct device_attribute force_ro; + struct device_attribute power_ro_lock; + int area_type; }; static DEFINE_MUTEX(open_lock); @@ -119,6 +121,7 @@ enum mmc_blk_status { MMC_BLK_ABORT, MMC_BLK_DATA_ERR, MMC_BLK_ECC_ERR, + MMC_BLK_NOMEDIUM, }; module_param(perdev_minors, int, 0444); @@ -165,6 +168,70 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_unlock(&open_lock); } +static ssize_t power_ro_lock_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + struct mmc_card *card = md->queue.card; + int locked = 0; + + if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PERM_WP_EN) + locked = 2; + else if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_EN) + locked = 1; + + ret = snprintf(buf, PAGE_SIZE, "%d\n", locked); + + return ret; +} + +static ssize_t power_ro_lock_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int ret; + struct mmc_blk_data *md, *part_md; + struct mmc_card *card; + unsigned long set; + + if (kstrtoul(buf, 0, &set)) + return -EINVAL; + + if (set != 1) + return count; + + md = mmc_blk_get(dev_to_disk(dev)); + card = md->queue.card; + + mmc_claim_host(card->host); + + ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP, + card->ext_csd.boot_ro_lock | + EXT_CSD_BOOT_WP_B_PWR_WP_EN, + card->ext_csd.part_time); + if (ret) + pr_err("%s: Locking boot partition ro until next power on failed: %d\n", md->disk->disk_name, ret); + else + card->ext_csd.boot_ro_lock |= EXT_CSD_BOOT_WP_B_PWR_WP_EN; + + mmc_release_host(card->host); + + if (!ret) { + pr_info("%s: Locking boot partition ro until next power on\n", + md->disk->disk_name); + set_disk_ro(md->disk, 1); + + list_for_each_entry(part_md, &md->part, part) + if (part_md->area_type == MMC_BLK_DATA_AREA_BOOT) { + pr_info("%s: Locking boot partition ro until next power on\n", part_md->disk->disk_name); + set_disk_ro(part_md->disk, 1); + } + } + + mmc_blk_put(md); + return count; +} + static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -266,6 +333,9 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( goto idata_err; } + if (!idata->buf_bytes) + return idata; + idata->buf = kzalloc(idata->buf_bytes, GFP_KERNEL); if (!idata->buf) { err = -ENOMEM; @@ -312,25 +382,6 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev, if (IS_ERR(idata)) return PTR_ERR(idata); - cmd.opcode = idata->ic.opcode; - cmd.arg = idata->ic.arg; - cmd.flags = idata->ic.flags; - - data.sg = &sg; - data.sg_len = 1; - data.blksz = idata->ic.blksz; - data.blocks = idata->ic.blocks; - - sg_init_one(data.sg, idata->buf, idata->buf_bytes); - - if (idata->ic.write_flag) - data.flags = MMC_DATA_WRITE; - else - data.flags = MMC_DATA_READ; - - mrq.cmd = &cmd; - mrq.data = &data; - md = mmc_blk_get(bdev->bd_disk); if (!md) { err = -EINVAL; @@ -343,6 +394,48 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev, goto cmd_done; } + cmd.opcode = idata->ic.opcode; + cmd.arg = idata->ic.arg; + cmd.flags = idata->ic.flags; + + if (idata->buf_bytes) { + data.sg = &sg; + data.sg_len = 1; + data.blksz = idata->ic.blksz; + data.blocks = idata->ic.blocks; + + sg_init_one(data.sg, idata->buf, idata->buf_bytes); + + if (idata->ic.write_flag) + data.flags = MMC_DATA_WRITE; + else + data.flags = MMC_DATA_READ; + + /* data.flags must already be set before doing this. */ + mmc_set_data_timeout(&data, card); + + /* Allow overriding the timeout_ns for empirical tuning. */ + if (idata->ic.data_timeout_ns) + data.timeout_ns = idata->ic.data_timeout_ns; + + if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) { + /* + * Pretend this is a data transfer and rely on the + * host driver to compute timeout. When all host + * drivers support cmd.cmd_timeout for R1B, this + * can be changed to: + * + * mrq.data = NULL; + * cmd.cmd_timeout = idata->ic.cmd_timeout_ms; + */ + data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000; + } + + mrq.data = &data; + } + + mrq.cmd = &cmd; + mmc_claim_host(card->host); if (idata->ic.is_acmd) { @@ -351,24 +444,6 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev, goto cmd_rel_host; } - /* data.flags must already be set before doing this. */ - mmc_set_data_timeout(&data, card); - /* Allow overriding the timeout_ns for empirical tuning. */ - if (idata->ic.data_timeout_ns) - data.timeout_ns = idata->ic.data_timeout_ns; - - if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) { - /* - * Pretend this is a data transfer and rely on the host driver - * to compute timeout. When all host drivers support - * cmd.cmd_timeout for R1B, this can be changed to: - * - * mrq.data = NULL; - * cmd.cmd_timeout = idata->ic.cmd_timeout_ms; - */ - data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000; - } - mmc_wait_for_req(card->host, &mrq); if (cmd.error) { @@ -565,6 +640,7 @@ static int get_card_status(struct mmc_card *card, u32 *status, int retries) return err; } +#define ERR_NOMEDIUM 3 #define ERR_RETRY 2 #define ERR_ABORT 1 #define ERR_CONTINUE 0 @@ -632,6 +708,9 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, u32 status, stop_status = 0; int err, retry; + if (mmc_card_removed(card)) + return ERR_NOMEDIUM; + /* * Try to get card status which indicates both the card state * and why there was no response. If the first attempt fails, @@ -648,8 +727,12 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, } /* We couldn't get a response from the card. Give up. */ - if (err) + if (err) { + /* Check if the card is removed */ + if (mmc_detect_card_removed(card->host)) + return ERR_NOMEDIUM; return ERR_ABORT; + } /* Flag ECC errors */ if ((status & R1_CARD_ECC_FAILED) || @@ -922,6 +1005,8 @@ static int mmc_blk_err_check(struct mmc_card *card, return MMC_BLK_RETRY; case ERR_ABORT: return MMC_BLK_ABORT; + case ERR_NOMEDIUM: + return MMC_BLK_NOMEDIUM; case ERR_CONTINUE: break; } @@ -1255,6 +1340,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) if (!ret) goto start_new_req; break; + case MMC_BLK_NOMEDIUM: + goto cmd_abort; } if (ret) { @@ -1271,6 +1358,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) cmd_abort: spin_lock_irq(&md->lock); + if (mmc_card_removed(card)) + req->cmd_flags |= REQ_QUIET; while (ret) ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); spin_unlock_irq(&md->lock); @@ -1339,7 +1428,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct device *parent, sector_t size, bool default_ro, - const char *subname) + const char *subname, + int area_type) { struct mmc_blk_data *md; int devidx, ret; @@ -1364,11 +1454,12 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, if (!subname) { md->name_idx = find_first_zero_bit(name_use, max_devices); __set_bit(md->name_idx, name_use); - } - else + } else md->name_idx = ((struct mmc_blk_data *) dev_to_disk(parent)->private_data)->name_idx; + md->area_type = area_type; + /* * Set the read-only status based on the supported commands * and the write protect switch. @@ -1462,7 +1553,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) size = card->csd.capacity << (card->csd.read_blkbits - 9); } - md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL); + md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL, + MMC_BLK_DATA_AREA_MAIN); return md; } @@ -1471,13 +1563,14 @@ static int mmc_blk_alloc_part(struct mmc_card *card, unsigned int part_type, sector_t size, bool default_ro, - const char *subname) + const char *subname, + int area_type) { char cap_str[10]; struct mmc_blk_data *part_md; part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro, - subname); + subname, area_type); if (IS_ERR(part_md)) return PTR_ERR(part_md); part_md->part_type = part_type; @@ -1510,7 +1603,8 @@ static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md) card->part[idx].part_cfg, card->part[idx].size >> 9, card->part[idx].force_ro, - card->part[idx].name); + card->part[idx].name, + card->part[idx].area_type); if (ret) return ret; } @@ -1539,9 +1633,16 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card) static void mmc_blk_remove_req(struct mmc_blk_data *md) { + struct mmc_card *card; + if (md) { + card = md->queue.card; if (md->disk->flags & GENHD_FL_UP) { device_remove_file(disk_to_dev(md->disk), &md->force_ro); + if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) && + card->ext_csd.boot_ro_lockable) + device_remove_file(disk_to_dev(md->disk), + &md->power_ro_lock); /* Stop new requests from getting into the queue */ del_gendisk(md->disk); @@ -1570,6 +1671,7 @@ static void mmc_blk_remove_parts(struct mmc_card *card, static int mmc_add_disk(struct mmc_blk_data *md) { int ret; + struct mmc_card *card = md->queue.card; add_disk(md->disk); md->force_ro.show = force_ro_show; @@ -1579,18 +1681,53 @@ static int mmc_add_disk(struct mmc_blk_data *md) md->force_ro.attr.mode = S_IRUGO | S_IWUSR; ret = device_create_file(disk_to_dev(md->disk), &md->force_ro); if (ret) - del_gendisk(md->disk); + goto force_ro_fail; + + if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) && + card->ext_csd.boot_ro_lockable) { + mode_t mode; + + if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS) + mode = S_IRUGO; + else + mode = S_IRUGO | S_IWUSR; + + md->power_ro_lock.show = power_ro_lock_show; + md->power_ro_lock.store = power_ro_lock_store; + md->power_ro_lock.attr.mode = mode; + md->power_ro_lock.attr.name = + "ro_lock_until_next_power_on"; + ret = device_create_file(disk_to_dev(md->disk), + &md->power_ro_lock); + if (ret) + goto power_ro_lock_fail; + } + return ret; + +power_ro_lock_fail: + device_remove_file(disk_to_dev(md->disk), &md->force_ro); +force_ro_fail: + del_gendisk(md->disk); return ret; } +#define CID_MANFID_SANDISK 0x2 +#define CID_MANFID_TOSHIBA 0x11 +#define CID_MANFID_MICRON 0x13 + static const struct mmc_fixup blk_fixups[] = { - MMC_FIXUP("SEM02G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), - MMC_FIXUP("SEM04G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), - MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), - MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), - MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM02G", CID_MANFID_SANDISK, 0x100, add_quirk, + MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM04G", CID_MANFID_SANDISK, 0x100, add_quirk, + MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM08G", CID_MANFID_SANDISK, 0x100, add_quirk, + MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM16G", CID_MANFID_SANDISK, 0x100, add_quirk, + MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM32G", CID_MANFID_SANDISK, 0x100, add_quirk, + MMC_QUIRK_INAND_CMD38), /* * Some MMC cards experience performance degradation with CMD23 @@ -1600,18 +1737,18 @@ static const struct mmc_fixup blk_fixups[] = * * N.B. This doesn't affect SD cards. */ - MMC_FIXUP("MMC08G", 0x11, CID_OEMID_ANY, add_quirk_mmc, + MMC_FIXUP("MMC08G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_BLK_NO_CMD23), - MMC_FIXUP("MMC16G", 0x11, CID_OEMID_ANY, add_quirk_mmc, + MMC_FIXUP("MMC16G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_BLK_NO_CMD23), - MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc, + MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_BLK_NO_CMD23), /* * Some Micron MMC cards needs longer data read timeout than * indicated in CSD. */ - MMC_FIXUP(CID_NAME_ANY, 0x13, 0x200, add_quirk_mmc, + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc, MMC_QUIRK_LONG_READ_TIME), END_FIXUP diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index e99bdc18002..759714ed6be 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -1581,6 +1581,7 @@ static int mmc_test_area_init(struct mmc_test_card *test, int erase, int fill) t->max_segs = test->card->host->max_segs; t->max_seg_sz = test->card->host->max_seg_size; + t->max_seg_sz -= t->max_seg_sz % 512; t->max_tfr = t->max_sz; if (t->max_tfr >> 9 > test->card->host->max_blk_count) diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index dcad59cbfef..2517547b436 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -29,6 +29,8 @@ */ static int mmc_prep_request(struct request_queue *q, struct request *req) { + struct mmc_queue *mq = q->queuedata; + /* * We only like normal block requests and discards. */ @@ -37,6 +39,9 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) return BLKPREP_KILL; } + if (mq && mmc_card_removed(mq->card)) + return BLKPREP_KILL; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile index 639501970b4..dca4428380f 100644 --- a/drivers/mmc/core/Makefile +++ b/drivers/mmc/core/Makefile @@ -7,6 +7,6 @@ mmc_core-y := core.o bus.o host.o \ mmc.o mmc_ops.o sd.o sd_ops.o \ sdio.o sdio_ops.o sdio_bus.o \ sdio_cis.o sdio_io.o sdio_irq.o \ - quirks.o + quirks.o cd-gpio.o mmc_core-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 6be49249895..5d011a39dff 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -303,10 +303,11 @@ int mmc_add_card(struct mmc_card *card) mmc_card_ddr_mode(card) ? "DDR " : "", type); } else { - printk(KERN_INFO "%s: new %s%s%s card at address %04x\n", + pr_info("%s: new %s%s%s%s card at address %04x\n", mmc_hostname(card->host), - mmc_sd_card_uhs(card) ? "ultra high speed " : + mmc_card_uhs(card) ? "ultra high speed " : (mmc_card_highspeed(card) ? "high speed " : ""), + (mmc_card_hs200(card) ? "HS200 " : ""), mmc_card_ddr_mode(card) ? "DDR " : "", type, card->rca); } diff --git a/drivers/mmc/core/cd-gpio.c b/drivers/mmc/core/cd-gpio.c new file mode 100644 index 00000000000..082202ae4a0 --- /dev/null +++ b/drivers/mmc/core/cd-gpio.c @@ -0,0 +1,74 @@ +/* + * Generic GPIO card-detect helper + * + * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/gpio.h> +#include <linux/interrupt.h> +#include <linux/jiffies.h> +#include <linux/mmc/host.h> +#include <linux/module.h> +#include <linux/slab.h> + +struct mmc_cd_gpio { + unsigned int gpio; + char label[0]; +}; + +static irqreturn_t mmc_cd_gpio_irqt(int irq, void *dev_id) +{ + /* Schedule a card detection after a debounce timeout */ + mmc_detect_change(dev_id, msecs_to_jiffies(100)); + return IRQ_HANDLED; +} + +int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio, + unsigned int irq, unsigned long flags) +{ + size_t len = strlen(dev_name(host->parent)) + 4; + struct mmc_cd_gpio *cd = kmalloc(sizeof(*cd) + len, GFP_KERNEL); + int ret; + + if (!cd) + return -ENOMEM; + + snprintf(cd->label, len, "%s cd", dev_name(host->parent)); + + ret = gpio_request_one(gpio, GPIOF_DIR_IN, cd->label); + if (ret < 0) + goto egpioreq; + + ret = request_threaded_irq(irq, NULL, mmc_cd_gpio_irqt, + flags, cd->label, host); + if (ret < 0) + goto eirqreq; + + cd->gpio = gpio; + host->hotplug.irq = irq; + host->hotplug.handler_priv = cd; + + return 0; + +eirqreq: + gpio_free(gpio); +egpioreq: + kfree(cd); + return ret; +} +EXPORT_SYMBOL(mmc_cd_gpio_request); + +void mmc_cd_gpio_free(struct mmc_host *host) +{ + struct mmc_cd_gpio *cd = host->hotplug.handler_priv; + + free_irq(host->hotplug.irq, host); + gpio_free(cd->gpio); + kfree(cd); +} +EXPORT_SYMBOL(mmc_cd_gpio_free); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 75d7d7e1736..f545a3e6eb8 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -140,7 +140,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) cmd->retries = 0; } - if (err && cmd->retries) { + if (err && cmd->retries && !mmc_card_removed(host->card)) { /* * Request starter must handle retries - see * mmc_wait_for_req_done(). @@ -247,6 +247,11 @@ static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq) { init_completion(&mrq->completion); mrq->done = mmc_wait_done; + if (mmc_card_removed(host->card)) { + mrq->cmd->error = -ENOMEDIUM; + complete(&mrq->completion); + return; + } mmc_start_request(host, mrq); } @@ -259,7 +264,8 @@ static void mmc_wait_for_req_done(struct mmc_host *host, wait_for_completion(&mrq->completion); cmd = mrq->cmd; - if (!cmd->error || !cmd->retries) + if (!cmd->error || !cmd->retries || + mmc_card_removed(host->card)) break; pr_debug("%s: req failed (CMD%u): %d, retrying...\n", @@ -1456,7 +1462,7 @@ void mmc_detect_change(struct mmc_host *host, unsigned long delay) WARN_ON(host->removed); spin_unlock_irqrestore(&host->lock, flags); #endif - + host->detect_change = 1; mmc_schedule_delayed_work(&host->detect, delay); } @@ -2049,6 +2055,43 @@ static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq) return -EIO; } +int _mmc_detect_card_removed(struct mmc_host *host) +{ + int ret; + + if ((host->caps & MMC_CAP_NONREMOVABLE) || !host->bus_ops->alive) + return 0; + + if (!host->card || mmc_card_removed(host->card)) + return 1; + + ret = host->bus_ops->alive(host); + if (ret) { + mmc_card_set_removed(host->card); + pr_debug("%s: card remove detected\n", mmc_hostname(host)); + } + + return ret; +} + +int mmc_detect_card_removed(struct mmc_host *host) +{ + struct mmc_card *card = host->card; + + WARN_ON(!host->claimed); + /* + * The card will be considered unchanged unless we have been asked to + * detect a change or host requires polling to provide card detection. + */ + if (card && !host->detect_change && !(host->caps & MMC_CAP_NEEDS_POLL)) + return mmc_card_removed(card); + + host->detect_change = 0; + + return _mmc_detect_card_removed(host); +} +EXPORT_SYMBOL(mmc_detect_card_removed); + void mmc_rescan(struct work_struct *work) { static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; @@ -2069,6 +2112,8 @@ void mmc_rescan(struct work_struct *work) && !(host->caps & MMC_CAP_NONREMOVABLE)) host->bus_ops->detect(host); + host->detect_change = 0; + /* * Let mmc_bus_put() free the bus/bus_ops if we've found that * the card is no longer present. @@ -2130,6 +2175,7 @@ void mmc_stop_host(struct mmc_host *host) mmc_bus_get(host); if (host->bus_ops && !host->bus_dead) { + /* Calling bus_ops->remove() with a claimed host can deadlock */ if (host->bus_ops->remove) host->bus_ops->remove(host); @@ -2201,6 +2247,9 @@ int mmc_card_awake(struct mmc_host *host) { int err = -ENOSYS; + if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD) + return 0; + mmc_bus_get(host); if (host->bus_ops && !host->bus_dead && host->bus_ops->awake) @@ -2216,6 +2265,9 @@ int mmc_card_sleep(struct mmc_host *host) { int err = -ENOSYS; + if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD) + return 0; + mmc_bus_get(host); if (host->bus_ops && !host->bus_dead && host->bus_ops->sleep) @@ -2270,6 +2322,7 @@ EXPORT_SYMBOL(mmc_flush_cache); int mmc_cache_ctrl(struct mmc_host *host, u8 enable) { struct mmc_card *card = host->card; + unsigned int timeout; int err = 0; if (!(host->caps2 & MMC_CAP2_CACHE_CTRL) || @@ -2280,16 +2333,18 @@ int mmc_cache_ctrl(struct mmc_host *host, u8 enable) (card->ext_csd.cache_size > 0)) { enable = !!enable; - if (card->ext_csd.cache_ctrl ^ enable) + if (card->ext_csd.cache_ctrl ^ enable) { + timeout = enable ? card->ext_csd.generic_cmd6_time : 0; err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_CACHE_CTRL, enable, 0); - if (err) - pr_err("%s: cache %s error %d\n", - mmc_hostname(card->host), - enable ? "on" : "off", - err); - else - card->ext_csd.cache_ctrl = enable; + EXT_CSD_CACHE_CTRL, enable, timeout); + if (err) + pr_err("%s: cache %s error %d\n", + mmc_hostname(card->host), + enable ? "on" : "off", + err); + else + card->ext_csd.cache_ctrl = enable; + } } return err; @@ -2310,7 +2365,13 @@ int mmc_suspend_host(struct mmc_host *host) cancel_delayed_work(&host->disable); cancel_delayed_work(&host->detect); mmc_flush_scheduled_work(); - err = mmc_cache_ctrl(host, 0); + if (mmc_try_claim_host(host)) { + err = mmc_cache_ctrl(host, 0); + mmc_do_release_host(host); + } else { + err = -EBUSY; + } + if (err) goto out; @@ -2338,7 +2399,9 @@ int mmc_suspend_host(struct mmc_host *host) if (err == -ENOSYS || !host->bus_ops->resume) { /* * We simply "remove" the card in this case. - * It will be redetected on resume. + * It will be redetected on resume. (Calling + * bus_ops->remove() with a claimed host can + * deadlock.) */ if (host->bus_ops->remove) host->bus_ops->remove(host); @@ -2431,11 +2494,11 @@ int mmc_pm_notify(struct notifier_block *notify_block, if (!host->bus_ops || host->bus_ops->suspend) break; - mmc_claim_host(host); - + /* Calling bus_ops->remove() with a claimed host can deadlock */ if (host->bus_ops->remove) host->bus_ops->remove(host); + mmc_claim_host(host); mmc_detach_bus(host); mmc_power_off(host); mmc_release_host(host); diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index afa6bd2b7b7..3bdafbca354 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -24,6 +24,7 @@ struct mmc_bus_ops { int (*resume)(struct mmc_host *); int (*power_save)(struct mmc_host *); int (*power_restore)(struct mmc_host *); + int (*alive)(struct mmc_host *); }; void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops); @@ -59,6 +60,8 @@ void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); +int _mmc_detect_card_removed(struct mmc_host *host); + int mmc_attach_mmc(struct mmc_host *host); int mmc_attach_sd(struct mmc_host *host); int mmc_attach_sdio(struct mmc_host *host); diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 3923880118b..9ab5b17d488 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -57,6 +57,8 @@ static int mmc_ios_show(struct seq_file *s, void *data) const char *str; seq_printf(s, "clock:\t\t%u Hz\n", ios->clock); + if (host->actual_clock) + seq_printf(s, "actual clock:\t%u Hz\n", host->actual_clock); seq_printf(s, "vdd:\t\t%u ", ios->vdd); if ((1 << ios->vdd) & MMC_VDD_165_195) seq_printf(s, "(1.65 - 1.95 V)\n"); @@ -133,6 +135,9 @@ static int mmc_ios_show(struct seq_file *s, void *data) case MMC_TIMING_UHS_DDR50: str = "sd uhs DDR50"; break; + case MMC_TIMING_MMC_HS200: + str = "mmc high-speed SDR200"; + break; default: str = "invalid"; break; diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index d31c78b72b0..30055f2b0d4 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -54,6 +54,27 @@ static DEFINE_IDR(mmc_host_idr); static DEFINE_SPINLOCK(mmc_host_lock); #ifdef CONFIG_MMC_CLKGATE +static ssize_t clkgate_delay_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + return snprintf(buf, PAGE_SIZE, "%lu\n", host->clkgate_delay); +} + +static ssize_t clkgate_delay_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + unsigned long flags, value; + + if (kstrtoul(buf, 0, &value)) + return -EINVAL; + + spin_lock_irqsave(&host->clk_lock, flags); + host->clkgate_delay = value; + spin_unlock_irqrestore(&host->clk_lock, flags); + return count; +} /* * Enabling clock gating will make the core call out to the host @@ -114,7 +135,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) static void mmc_host_clk_gate_work(struct work_struct *work) { struct mmc_host *host = container_of(work, struct mmc_host, - clk_gate_work); + clk_gate_work.work); mmc_host_clk_gate_delayed(host); } @@ -131,6 +152,8 @@ void mmc_host_clk_hold(struct mmc_host *host) { unsigned long flags; + /* cancel any clock gating work scheduled by mmc_host_clk_release() */ + cancel_delayed_work_sync(&host->clk_gate_work); mutex_lock(&host->clk_gate_mutex); spin_lock_irqsave(&host->clk_lock, flags); if (host->clk_gated) { @@ -180,7 +203,8 @@ void mmc_host_clk_release(struct mmc_host *host) host->clk_requests--; if (mmc_host_may_gate_card(host->card) && !host->clk_requests) - queue_work(system_nrt_wq, &host->clk_gate_work); + queue_delayed_work(system_nrt_wq, &host->clk_gate_work, + msecs_to_jiffies(host->clkgate_delay)); spin_unlock_irqrestore(&host->clk_lock, flags); } @@ -213,8 +237,13 @@ static inline void mmc_host_clk_init(struct mmc_host *host) host->clk_requests = 0; /* Hold MCI clock for 8 cycles by default */ host->clk_delay = 8; + /* + * Default clock gating delay is 200ms. + * This value can be tuned by writing into sysfs entry. + */ + host->clkgate_delay = 200; host->clk_gated = false; - INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); + INIT_DELAYED_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); spin_lock_init(&host->clk_lock); mutex_init(&host->clk_gate_mutex); } @@ -229,7 +258,7 @@ static inline void mmc_host_clk_exit(struct mmc_host *host) * Wait for any outstanding gate and then make sure we're * ungated before exiting. */ - if (cancel_work_sync(&host->clk_gate_work)) + if (cancel_delayed_work_sync(&host->clk_gate_work)) mmc_host_clk_gate_delayed(host); if (host->clk_gated) mmc_host_clk_hold(host); @@ -237,6 +266,17 @@ static inline void mmc_host_clk_exit(struct mmc_host *host) WARN_ON(host->clk_requests > 1); } +static inline void mmc_host_clk_sysfs_init(struct mmc_host *host) +{ + host->clkgate_delay_attr.show = clkgate_delay_show; + host->clkgate_delay_attr.store = clkgate_delay_store; + sysfs_attr_init(&host->clkgate_delay_attr.attr); + host->clkgate_delay_attr.attr.name = "clkgate_delay"; + host->clkgate_delay_attr.attr.mode = S_IRUGO | S_IWUSR; + if (device_create_file(&host->class_dev, &host->clkgate_delay_attr)) + pr_err("%s: Failed to create clkgate_delay sysfs entry\n", + mmc_hostname(host)); +} #else static inline void mmc_host_clk_init(struct mmc_host *host) @@ -247,6 +287,10 @@ static inline void mmc_host_clk_exit(struct mmc_host *host) { } +static inline void mmc_host_clk_sysfs_init(struct mmc_host *host) +{ +} + #endif /** @@ -335,6 +379,7 @@ int mmc_add_host(struct mmc_host *host) #ifdef CONFIG_DEBUG_FS mmc_add_host_debugfs(host); #endif + mmc_host_clk_sysfs_init(host); mmc_start_host(host); register_pm_notifier(&host->pm_notify); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index d240427c124..59b9ba52e66 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -286,6 +286,27 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) } card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE]; switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) { + case EXT_CSD_CARD_TYPE_SDR_ALL: + case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V: + case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V: + case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52: + card->ext_csd.hs_max_dtr = 200000000; + card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_200; + break; + case EXT_CSD_CARD_TYPE_SDR_1_2V_ALL: + case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V: + case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V: + case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52: + card->ext_csd.hs_max_dtr = 200000000; + card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_2V; + break; + case EXT_CSD_CARD_TYPE_SDR_1_8V_ALL: + case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V: + case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V: + case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52: + card->ext_csd.hs_max_dtr = 200000000; + card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_8V; + break; case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 | EXT_CSD_CARD_TYPE_26: card->ext_csd.hs_max_dtr = 52000000; @@ -348,7 +369,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) part_size = ext_csd[EXT_CSD_BOOT_MULT] << 17; mmc_part_add(card, part_size, EXT_CSD_PART_CONFIG_ACC_BOOT0 + idx, - "boot%d", idx, true); + "boot%d", idx, true, + MMC_BLK_DATA_AREA_BOOT); } } } @@ -435,7 +457,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) hc_wp_grp_sz); mmc_part_add(card, part_size << 19, EXT_CSD_PART_CONFIG_ACC_GP0 + idx, - "gp%d", idx, false); + "gp%d", idx, false, + MMC_BLK_DATA_AREA_GP); } } card->ext_csd.sec_trim_mult = @@ -446,6 +469,14 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT]; card->ext_csd.trim_timeout = 300 * ext_csd[EXT_CSD_TRIM_MULT]; + + /* + * Note that the call to mmc_part_add above defaults to read + * only. If this default assumption is changed, the call must + * take into account the value of boot_locked below. + */ + card->ext_csd.boot_ro_lock = ext_csd[EXT_CSD_BOOT_WP]; + card->ext_csd.boot_ro_lockable = true; } if (card->ext_csd.rev >= 5) { @@ -690,6 +721,79 @@ static int mmc_select_powerclass(struct mmc_card *card, } /* + * Selects the desired buswidth and switch to the HS200 mode + * if bus width set without error + */ +static int mmc_select_hs200(struct mmc_card *card) +{ + int idx, err = 0; + struct mmc_host *host; + static unsigned ext_csd_bits[] = { + EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_8, + }; + static unsigned bus_widths[] = { + MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_8, + }; + + BUG_ON(!card); + + host = card->host; + + if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V && + host->caps2 & MMC_CAP2_HS200_1_2V_SDR) + if (mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120, 0)) + err = mmc_set_signal_voltage(host, + MMC_SIGNAL_VOLTAGE_180, 0); + + /* If fails try again during next card power cycle */ + if (err) + goto err; + + idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0; + + /* + * Unlike SD, MMC cards dont have a configuration register to notify + * supported bus width. So bus test command should be run to identify + * the supported bus width or compare the ext csd values of current + * bus width and ext csd values of 1 bit mode read earlier. + */ + for (; idx >= 0; idx--) { + + /* + * Host is capable of 8bit transfer, then switch + * the device to work in 8bit transfer mode. If the + * mmc switch command returns error then switch to + * 4bit transfer mode. On success set the corresponding + * bus width on the host. + */ + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_BUS_WIDTH, + ext_csd_bits[idx], + card->ext_csd.generic_cmd6_time); + if (err) + continue; + + mmc_set_bus_width(card->host, bus_widths[idx]); + + if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST)) + err = mmc_compare_ext_csds(card, bus_widths[idx]); + else + err = mmc_bus_test(card, bus_widths[idx]); + if (!err) + break; + } + + /* switch to HS200 mode if bus width set successfully */ + if (!err) + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, 2, 0); +err: + return err; +} + +/* * Handle the detection and initialisation of a card. * * In the case of a resume, "oldcard" will contain the card @@ -895,11 +999,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, /* * Activate high speed (if supported) */ - if ((card->ext_csd.hs_max_dtr != 0) && - (host->caps & MMC_CAP_MMC_HIGHSPEED)) { - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, 1, - card->ext_csd.generic_cmd6_time); + if (card->ext_csd.hs_max_dtr != 0) { + err = 0; + if (card->ext_csd.hs_max_dtr > 52000000 && + host->caps2 & MMC_CAP2_HS200) + err = mmc_select_hs200(card); + else if (host->caps & MMC_CAP_MMC_HIGHSPEED) + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, 1, 0); + if (err && err != -EBADMSG) goto free_card; @@ -908,8 +1016,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, mmc_hostname(card->host)); err = 0; } else { - mmc_card_set_highspeed(card); - mmc_set_timing(card->host, MMC_TIMING_MMC_HS); + if (card->ext_csd.hs_max_dtr > 52000000 && + host->caps2 & MMC_CAP2_HS200) { + mmc_card_set_hs200(card); + mmc_set_timing(card->host, + MMC_TIMING_MMC_HS200); + } else { + mmc_card_set_highspeed(card); + mmc_set_timing(card->host, MMC_TIMING_MMC_HS); + } } } @@ -934,7 +1049,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, */ max_dtr = (unsigned int)-1; - if (mmc_card_highspeed(card)) { + if (mmc_card_highspeed(card) || mmc_card_hs200(card)) { if (max_dtr > card->ext_csd.hs_max_dtr) max_dtr = card->ext_csd.hs_max_dtr; } else if (max_dtr > card->csd.max_dtr) { @@ -960,9 +1075,48 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, } /* + * Indicate HS200 SDR mode (if supported). + */ + if (mmc_card_hs200(card)) { + u32 ext_csd_bits; + u32 bus_width = card->host->ios.bus_width; + + /* + * For devices supporting HS200 mode, the bus width has + * to be set before executing the tuning function. If + * set before tuning, then device will respond with CRC + * errors for responses on CMD line. So for HS200 the + * sequence will be + * 1. set bus width 4bit / 8 bit (1 bit not supported) + * 2. switch to HS200 mode + * 3. set the clock to > 52Mhz <=200MHz and + * 4. execute tuning for HS200 + */ + if ((host->caps2 & MMC_CAP2_HS200) && + card->host->ops->execute_tuning) + err = card->host->ops->execute_tuning(card->host, + MMC_SEND_TUNING_BLOCK_HS200); + if (err) { + pr_warning("%s: tuning execution failed\n", + mmc_hostname(card->host)); + goto err; + } + + ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ? + EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4; + err = mmc_select_powerclass(card, ext_csd_bits, ext_csd); + if (err) { + pr_err("%s: power class selection to bus width %d failed\n", + mmc_hostname(card->host), 1 << bus_width); + goto err; + } + } + + /* * Activate wide bus and DDR (if supported). */ - if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) && + if (!mmc_card_hs200(card) && + (card->csd.mmca_vsn >= CSD_SPEC_VER_3) && (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) { static unsigned ext_csd_bits[][2] = { { EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 }, @@ -1048,7 +1202,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, * * WARNING: eMMC rules are NOT the same as SD DDR */ - if (ddr == EXT_CSD_CARD_TYPE_DDR_1_2V) { + if (ddr == MMC_1_2V_DDR_MODE) { err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120, 0); if (err) @@ -1067,14 +1221,23 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if ((host->caps2 & MMC_CAP2_CACHE_CTRL) && card->ext_csd.cache_size > 0) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_CACHE_CTRL, 1, 0); + EXT_CSD_CACHE_CTRL, 1, + card->ext_csd.generic_cmd6_time); if (err && err != -EBADMSG) goto free_card; /* * Only if no error, cache is turned on successfully. */ - card->ext_csd.cache_ctrl = err ? 0 : 1; + if (err) { + pr_warning("%s: Cache is supported, " + "but failed to turn on (%d)\n", + mmc_hostname(card->host), err); + card->ext_csd.cache_ctrl = 0; + err = 0; + } else { + card->ext_csd.cache_ctrl = 1; + } } if (!oldcard) @@ -1105,6 +1268,14 @@ static void mmc_remove(struct mmc_host *host) } /* + * Card detection - card is alive. + */ +static int mmc_alive(struct mmc_host *host) +{ + return mmc_send_status(host->card, NULL); +} + +/* * Card detection callback from host. */ static void mmc_detect(struct mmc_host *host) @@ -1119,7 +1290,7 @@ static void mmc_detect(struct mmc_host *host) /* * Just check if our card has been removed. */ - err = mmc_send_status(host->card, NULL); + err = _mmc_detect_card_removed(host); mmc_release_host(host); @@ -1224,6 +1395,7 @@ static const struct mmc_bus_ops mmc_ops = { .suspend = NULL, .resume = NULL, .power_restore = mmc_power_restore, + .alive = mmc_alive, }; static const struct mmc_bus_ops mmc_ops_unsafe = { @@ -1234,6 +1406,7 @@ static const struct mmc_bus_ops mmc_ops_unsafe = { .suspend = mmc_suspend, .resume = mmc_resume, .power_restore = mmc_power_restore, + .alive = mmc_alive, }; static void mmc_attach_bus_ops(struct mmc_host *host) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index f2a05ea40f2..c63ad03c29c 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -307,8 +307,8 @@ static int mmc_read_switch(struct mmc_card *card) goto out; } - if (status[13] & UHS_SDR50_BUS_SPEED) - card->sw_caps.hs_max_dtr = 50000000; + if (status[13] & SD_MODE_HIGH_SPEED) + card->sw_caps.hs_max_dtr = HIGH_SPEED_MAX_DTR; if (card->scr.sda_spec3) { card->sw_caps.sd3_bus_mode = status[13]; @@ -661,7 +661,8 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) /* SPI mode doesn't define CMD19 */ if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) - err = card->host->ops->execute_tuning(card->host); + err = card->host->ops->execute_tuning(card->host, + MMC_SEND_TUNING_BLOCK); out: kfree(status); @@ -960,7 +961,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, goto free_card; /* Card is an ultra-high-speed card */ - mmc_sd_card_set_uhs(card); + mmc_card_set_uhs(card); /* * Since initialization is now complete, enable preset @@ -1019,6 +1020,14 @@ static void mmc_sd_remove(struct mmc_host *host) } /* + * Card detection - card is alive. + */ +static int mmc_sd_alive(struct mmc_host *host) +{ + return mmc_send_status(host->card, NULL); +} + +/* * Card detection callback from host. */ static void mmc_sd_detect(struct mmc_host *host) @@ -1033,7 +1042,7 @@ static void mmc_sd_detect(struct mmc_host *host) /* * Just check if our card has been removed. */ - err = mmc_send_status(host->card, NULL); + err = _mmc_detect_card_removed(host); mmc_release_host(host); @@ -1102,6 +1111,7 @@ static const struct mmc_bus_ops mmc_sd_ops = { .suspend = NULL, .resume = NULL, .power_restore = mmc_sd_power_restore, + .alive = mmc_sd_alive, }; static const struct mmc_bus_ops mmc_sd_ops_unsafe = { @@ -1110,6 +1120,7 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = { .suspend = mmc_sd_suspend, .resume = mmc_sd_resume, .power_restore = mmc_sd_power_restore, + .alive = mmc_sd_alive, }; static void mmc_sd_attach_bus_ops(struct mmc_host *host) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 3ab565e32a6..bd7bacc950d 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -14,6 +14,7 @@ #include <linux/mmc/host.h> #include <linux/mmc/card.h> +#include <linux/mmc/mmc.h> #include <linux/mmc/sdio.h> #include <linux/mmc/sdio_func.h> #include <linux/mmc/sdio_ids.h> @@ -102,6 +103,7 @@ static int sdio_read_cccr(struct mmc_card *card) int ret; int cccr_vsn; unsigned char data; + unsigned char speed; memset(&card->cccr, 0, sizeof(struct sdio_cccr)); @@ -140,12 +142,60 @@ static int sdio_read_cccr(struct mmc_card *card) } if (cccr_vsn >= SDIO_CCCR_REV_1_20) { - ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &data); + ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed); if (ret) goto out; - if (data & SDIO_SPEED_SHS) - card->cccr.high_speed = 1; + card->scr.sda_spec3 = 0; + card->sw_caps.sd3_bus_mode = 0; + card->sw_caps.sd3_drv_type = 0; + if (cccr_vsn >= SDIO_CCCR_REV_3_00) { + card->scr.sda_spec3 = 1; + ret = mmc_io_rw_direct(card, 0, 0, + SDIO_CCCR_UHS, 0, &data); + if (ret) + goto out; + + if (card->host->caps & + (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_DDR50)) { + if (data & SDIO_UHS_DDR50) + card->sw_caps.sd3_bus_mode + |= SD_MODE_UHS_DDR50; + + if (data & SDIO_UHS_SDR50) + card->sw_caps.sd3_bus_mode + |= SD_MODE_UHS_SDR50; + + if (data & SDIO_UHS_SDR104) + card->sw_caps.sd3_bus_mode + |= SD_MODE_UHS_SDR104; + } + + ret = mmc_io_rw_direct(card, 0, 0, + SDIO_CCCR_DRIVE_STRENGTH, 0, &data); + if (ret) + goto out; + + if (data & SDIO_DRIVE_SDTA) + card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_A; + if (data & SDIO_DRIVE_SDTC) + card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_C; + if (data & SDIO_DRIVE_SDTD) + card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_D; + } + + /* if no uhs mode ensure we check for high speed */ + if (!card->sw_caps.sd3_bus_mode) { + if (speed & SDIO_SPEED_SHS) { + card->cccr.high_speed = 1; + card->sw_caps.hs_max_dtr = 50000000; + } else { + card->cccr.high_speed = 0; + card->sw_caps.hs_max_dtr = 25000000; + } + } } out: @@ -327,6 +377,194 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card) return max_dtr; } +static unsigned char host_drive_to_sdio_drive(int host_strength) +{ + switch (host_strength) { + case MMC_SET_DRIVER_TYPE_A: + return SDIO_DTSx_SET_TYPE_A; + case MMC_SET_DRIVER_TYPE_B: + return SDIO_DTSx_SET_TYPE_B; + case MMC_SET_DRIVER_TYPE_C: + return SDIO_DTSx_SET_TYPE_C; + case MMC_SET_DRIVER_TYPE_D: + return SDIO_DTSx_SET_TYPE_D; + default: + return SDIO_DTSx_SET_TYPE_B; + } +} + +static void sdio_select_driver_type(struct mmc_card *card) +{ + int host_drv_type = SD_DRIVER_TYPE_B; + int card_drv_type = SD_DRIVER_TYPE_B; + int drive_strength; + unsigned char card_strength; + int err; + + /* + * If the host doesn't support any of the Driver Types A,C or D, + * or there is no board specific handler then default Driver + * Type B is used. + */ + if (!(card->host->caps & + (MMC_CAP_DRIVER_TYPE_A | + MMC_CAP_DRIVER_TYPE_C | + MMC_CAP_DRIVER_TYPE_D))) + return; + + if (!card->host->ops->select_drive_strength) + return; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) + host_drv_type |= SD_DRIVER_TYPE_A; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) + host_drv_type |= SD_DRIVER_TYPE_C; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_D) + host_drv_type |= SD_DRIVER_TYPE_D; + + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A) + card_drv_type |= SD_DRIVER_TYPE_A; + + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type |= SD_DRIVER_TYPE_C; + + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D) + card_drv_type |= SD_DRIVER_TYPE_D; + + /* + * The drive strength that the hardware can support + * depends on the board design. Pass the appropriate + * information and let the hardware specific code + * return what is possible given the options + */ + drive_strength = card->host->ops->select_drive_strength( + card->sw_caps.uhs_max_dtr, + host_drv_type, card_drv_type); + + /* if error just use default for drive strength B */ + err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_DRIVE_STRENGTH, 0, + &card_strength); + if (err) + return; + + card_strength &= ~(SDIO_DRIVE_DTSx_MASK<<SDIO_DRIVE_DTSx_SHIFT); + card_strength |= host_drive_to_sdio_drive(drive_strength); + + err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_DRIVE_STRENGTH, + card_strength, NULL); + + /* if error default to drive strength B */ + if (!err) + mmc_set_driver_type(card->host, drive_strength); +} + + +static int sdio_set_bus_speed_mode(struct mmc_card *card) +{ + unsigned int bus_speed, timing; + int err; + unsigned char speed; + + /* + * If the host doesn't support any of the UHS-I modes, fallback on + * default speed. + */ + if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))) + return 0; + + bus_speed = SDIO_SPEED_SDR12; + timing = MMC_TIMING_UHS_SDR12; + if ((card->host->caps & MMC_CAP_UHS_SDR104) && + (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) { + bus_speed = SDIO_SPEED_SDR104; + timing = MMC_TIMING_UHS_SDR104; + card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR; + } else if ((card->host->caps & MMC_CAP_UHS_DDR50) && + (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) { + bus_speed = SDIO_SPEED_DDR50; + timing = MMC_TIMING_UHS_DDR50; + card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR; + } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode & + SD_MODE_UHS_SDR50)) { + bus_speed = SDIO_SPEED_SDR50; + timing = MMC_TIMING_UHS_SDR50; + card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR; + } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) && + (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) { + bus_speed = SDIO_SPEED_SDR25; + timing = MMC_TIMING_UHS_SDR25; + card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR; + } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode & + SD_MODE_UHS_SDR12)) { + bus_speed = SDIO_SPEED_SDR12; + timing = MMC_TIMING_UHS_SDR12; + card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR; + } + + err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed); + if (err) + return err; + + speed &= ~SDIO_SPEED_BSS_MASK; + speed |= bus_speed; + err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_SPEED, speed, NULL); + if (err) + return err; + + if (bus_speed) { + mmc_set_timing(card->host, timing); + mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr); + } + + return 0; +} + +/* + * UHS-I specific initialization procedure + */ +static int mmc_sdio_init_uhs_card(struct mmc_card *card) +{ + int err; + + if (!card->scr.sda_spec3) + return 0; + + /* + * Switch to wider bus (if supported). + */ + if (card->host->caps & MMC_CAP_4_BIT_DATA) { + err = sdio_enable_4bit_bus(card); + if (err > 0) { + mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4); + err = 0; + } + } + + /* Set the driver strength for the card */ + sdio_select_driver_type(card); + + /* Set bus speed mode of the card */ + err = sdio_set_bus_speed_mode(card); + if (err) + goto out; + + /* Initialize and start re-tuning timer */ + if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) + err = card->host->ops->execute_tuning(card->host, + MMC_SEND_TUNING_BLOCK); + +out: + + return err; +} + /* * Handle the detection and initialisation of a card. * @@ -394,6 +632,30 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, host->ops->init_card(host, card); /* + * If the host and card support UHS-I mode request the card + * to switch to 1.8V signaling level. No 1.8v signalling if + * UHS mode is not enabled to maintain compatibilty and some + * systems that claim 1.8v signalling in fact do not support + * it. + */ + if ((ocr & R4_18V_PRESENT) && + (host->caps & + (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_DDR50))) { + err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180, + true); + if (err) { + ocr &= ~R4_18V_PRESENT; + host->ocr &= ~R4_18V_PRESENT; + } + err = 0; + } else { + ocr &= ~R4_18V_PRESENT; + host->ocr &= ~R4_18V_PRESENT; + } + + /* * For native busses: set card RCA and quit open drain mode. */ if (!powered_resume && !mmc_host_is_spi(host)) { @@ -492,29 +754,39 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, if (err) goto remove; - /* - * Switch to high-speed (if supported). - */ - err = sdio_enable_hs(card); - if (err > 0) - mmc_sd_go_highspeed(card); - else if (err) - goto remove; + /* Initialization sequence for UHS-I cards */ + /* Only if card supports 1.8v and UHS signaling */ + if ((ocr & R4_18V_PRESENT) && card->sw_caps.sd3_bus_mode) { + err = mmc_sdio_init_uhs_card(card); + if (err) + goto remove; - /* - * Change to the card's maximum speed. - */ - mmc_set_clock(host, mmc_sdio_get_max_clock(card)); + /* Card is an ultra-high-speed card */ + mmc_card_set_uhs(card); + } else { + /* + * Switch to high-speed (if supported). + */ + err = sdio_enable_hs(card); + if (err > 0) + mmc_sd_go_highspeed(card); + else if (err) + goto remove; - /* - * Switch to wider bus (if supported). - */ - err = sdio_enable_4bit_bus(card); - if (err > 0) - mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4); - else if (err) - goto remove; + /* + * Change to the card's maximum speed. + */ + mmc_set_clock(host, mmc_sdio_get_max_clock(card)); + /* + * Switch to wider bus (if supported). + */ + err = sdio_enable_4bit_bus(card); + if (err > 0) + mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4); + else if (err) + goto remove; + } finish: if (!oldcard) host->card = card; @@ -550,6 +822,14 @@ static void mmc_sdio_remove(struct mmc_host *host) } /* + * Card detection - card is alive. + */ +static int mmc_sdio_alive(struct mmc_host *host) +{ + return mmc_select_card(host->card); +} + +/* * Card detection callback from host. */ static void mmc_sdio_detect(struct mmc_host *host) @@ -571,7 +851,7 @@ static void mmc_sdio_detect(struct mmc_host *host) /* * Just check if our card has been removed. */ - err = mmc_select_card(host->card); + err = _mmc_detect_card_removed(host); mmc_release_host(host); @@ -749,6 +1029,7 @@ static const struct mmc_bus_ops mmc_sdio_ops = { .suspend = mmc_sdio_suspend, .resume = mmc_sdio_resume, .power_restore = mmc_sdio_power_restore, + .alive = mmc_sdio_alive, }; @@ -797,8 +1078,17 @@ int mmc_attach_sdio(struct mmc_host *host) * Detect and init the card. */ err = mmc_sdio_init_card(host, host->ocr, NULL, 0); - if (err) - goto err; + if (err) { + if (err == -EAGAIN) { + /* + * Retry initialization with S18R set to 0. + */ + host->ocr &= ~R4_18V_PRESENT; + err = mmc_sdio_init_card(host, host->ocr, NULL, 0); + } + if (err) + goto err; + } card = host->card; /* diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index b1f3168f791..8f6f5ac131f 100644 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -196,6 +196,9 @@ static inline unsigned int sdio_max_byte_size(struct sdio_func *func) else mval = min(mval, func->max_blksize); + if (mmc_card_broken_byte_mode_512(func->card)) + return min(mval, 511u); + return min(mval, 512u); /* maximum size for byte mode */ } @@ -314,7 +317,7 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, func->card->host->max_seg_size / func->cur_blksize); max_blocks = min(max_blocks, 511u); - while (remainder > func->cur_blksize) { + while (remainder >= func->cur_blksize) { unsigned blocks; blocks = remainder / func->cur_blksize; @@ -339,8 +342,9 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, while (remainder > 0) { size = min(remainder, sdio_max_byte_size(func)); + /* Indicate byte mode by setting "blocks" = 0 */ ret = mmc_io_rw_extended(func->card, write, func->num, addr, - incr_addr, buf, 1, size); + incr_addr, buf, 0, size); if (ret) return ret; diff --git a/drivers/mmc/core/sdio_ops.c b/drivers/mmc/core/sdio_ops.c index b0517cc0620..d29e20630ee 100644 --- a/drivers/mmc/core/sdio_ops.c +++ b/drivers/mmc/core/sdio_ops.c @@ -128,8 +128,6 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn, BUG_ON(!card); BUG_ON(fn > 7); - BUG_ON(blocks == 1 && blksz > 512); - WARN_ON(blocks == 0); WARN_ON(blksz == 0); /* sanity check */ @@ -144,22 +142,20 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn, cmd.arg |= fn << 28; cmd.arg |= incr_addr ? 0x04000000 : 0x00000000; cmd.arg |= addr << 9; - if (blocks == 1 && blksz < 512) - cmd.arg |= blksz; /* byte mode */ - else if (blocks == 1 && blksz == 512 && - !(mmc_card_broken_byte_mode_512(card))) - cmd.arg |= 0; /* byte mode, 0==512 */ + if (blocks == 0) + cmd.arg |= (blksz == 512) ? 0 : blksz; /* byte mode */ else cmd.arg |= 0x08000000 | blocks; /* block mode */ cmd.flags = MMC_RSP_SPI_R5 | MMC_RSP_R5 | MMC_CMD_ADTC; data.blksz = blksz; - data.blocks = blocks; + /* Code in host drivers/fwk assumes that "blocks" always is >=1 */ + data.blocks = blocks ? blocks : 1; data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ; data.sg = &sg; data.sg_len = 1; - sg_init_one(&sg, buf, blksz * blocks); + sg_init_one(&sg, buf, data.blksz * data.blocks); mmc_set_data_timeout(&data, card); diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index b4b83f302e3..745f8fce251 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_MMC_MXC) += mxcmmc.o obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o +obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += sdhci-pci-data.o obj-$(CONFIG_MMC_SDHCI_PXAV3) += sdhci-pxav3.o obj-$(CONFIG_MMC_SDHCI_PXAV2) += sdhci-pxav2.o obj-$(CONFIG_MMC_SDHCI_S3C) += sdhci-s3c.o diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index f437c3e6f3a..947faa5d2ce 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -236,7 +236,7 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data sg = &data->sg[i]; - sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; + sgbuffer = kmap_atomic(sg_page(sg)) + sg->offset; amount = min(size, sg->length); size -= amount; @@ -252,7 +252,7 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data dmabuf = (unsigned *)tmpv; } - kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ); + kunmap_atomic(sgbuffer); if (size == 0) break; @@ -302,7 +302,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) sg = &data->sg[i]; - sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; + sgbuffer = kmap_atomic(sg_page(sg)) + sg->offset; amount = min(size, sg->length); size -= amount; @@ -318,7 +318,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) } flush_kernel_dcache_page(sg_page(sg)); - kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ); + kunmap_atomic(sgbuffer); data->bytes_xfered += amount; if (size == 0) break; diff --git a/drivers/mmc/host/bfin_sdh.c b/drivers/mmc/host/bfin_sdh.c index 0371bf50224..03666174ca4 100644 --- a/drivers/mmc/host/bfin_sdh.c +++ b/drivers/mmc/host/bfin_sdh.c @@ -627,17 +627,7 @@ static struct platform_driver sdh_driver = { }, }; -static int __init sdh_init(void) -{ - return platform_driver_register(&sdh_driver); -} -module_init(sdh_init); - -static void __exit sdh_exit(void) -{ - platform_driver_unregister(&sdh_driver); -} -module_exit(sdh_exit); +module_platform_driver(sdh_driver); MODULE_DESCRIPTION("Blackfin Secure Digital Host Driver"); MODULE_AUTHOR("Cliff Cai, Roy Huang"); diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c index ce2a47b71dd..83693fd7c6b 100644 --- a/drivers/mmc/host/cb710-mmc.c +++ b/drivers/mmc/host/cb710-mmc.c @@ -780,18 +780,7 @@ static struct platform_driver cb710_mmc_driver = { #endif }; -static int __init cb710_mmc_init_module(void) -{ - return platform_driver_register(&cb710_mmc_driver); -} - -static void __exit cb710_mmc_cleanup_module(void) -{ - platform_driver_unregister(&cb710_mmc_driver); -} - -module_init(cb710_mmc_init_module); -module_exit(cb710_mmc_cleanup_module); +module_platform_driver(cb710_mmc_driver); MODULE_AUTHOR("MichaÅ‚ MirosÅ‚aw <mirq-linux@rere.qmqm.pl>"); MODULE_DESCRIPTION("ENE CB710 memory card reader driver - MMC/SD part"); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 3aaeb084191..0e342793ff1 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -588,11 +588,11 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot) mci_writel(host, CTYPE, (slot->ctype << slot->id)); } -static void dw_mci_start_request(struct dw_mci *host, - struct dw_mci_slot *slot) +static void __dw_mci_start_request(struct dw_mci *host, + struct dw_mci_slot *slot, + struct mmc_command *cmd) { struct mmc_request *mrq; - struct mmc_command *cmd; struct mmc_data *data; u32 cmdflags; @@ -610,14 +610,13 @@ static void dw_mci_start_request(struct dw_mci *host, host->completed_events = 0; host->data_status = 0; - data = mrq->data; + data = cmd->data; if (data) { dw_mci_set_timeout(host); mci_writel(host, BYTCNT, data->blksz*data->blocks); mci_writel(host, BLKSIZ, data->blksz); } - cmd = mrq->cmd; cmdflags = dw_mci_prepare_command(slot->mmc, cmd); /* this is the first command, send the initialization clock */ @@ -635,6 +634,16 @@ static void dw_mci_start_request(struct dw_mci *host, host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop); } +static void dw_mci_start_request(struct dw_mci *host, + struct dw_mci_slot *slot) +{ + struct mmc_request *mrq = slot->mrq; + struct mmc_command *cmd; + + cmd = mrq->sbc ? mrq->sbc : mrq->cmd; + __dw_mci_start_request(host, slot, cmd); +} + /* must be called with host->lock held */ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, struct mmc_request *mrq) @@ -698,12 +707,15 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) break; } + regs = mci_readl(slot->host, UHS_REG); + /* DDR mode set */ - if (ios->timing == MMC_TIMING_UHS_DDR50) { - regs = mci_readl(slot->host, UHS_REG); + if (ios->timing == MMC_TIMING_UHS_DDR50) regs |= (0x1 << slot->id) << 16; - mci_writel(slot->host, UHS_REG, regs); - } + else + regs &= ~(0x1 << slot->id) << 16; + + mci_writel(slot->host, UHS_REG, regs); if (ios->clock) { /* @@ -889,7 +901,14 @@ static void dw_mci_tasklet_func(unsigned long priv) cmd = host->cmd; host->cmd = NULL; set_bit(EVENT_CMD_COMPLETE, &host->completed_events); - dw_mci_command_complete(host, host->mrq->cmd); + dw_mci_command_complete(host, cmd); + if (cmd == host->mrq->sbc && !cmd->error) { + prev_state = state = STATE_SENDING_CMD; + __dw_mci_start_request(host, host->cur_slot, + host->mrq->cmd); + goto unlock; + } + if (!host->mrq->data || cmd->error) { dw_mci_request_end(host, host->mrq); goto unlock; @@ -967,6 +986,12 @@ static void dw_mci_tasklet_func(unsigned long priv) goto unlock; } + if (host->mrq->sbc && !data->error) { + data->stop->error = 0; + dw_mci_request_end(host, host->mrq); + goto unlock; + } + prev_state = state = STATE_SENDING_STOP; if (!data->error) send_stop_cmd(host, data); @@ -1678,8 +1703,9 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id) if (host->pdata->caps) mmc->caps = host->pdata->caps; - else - mmc->caps = 0; + + if (host->pdata->caps2) + mmc->caps2 = host->pdata->caps2; if (host->pdata->get_bus_wd) if (host->pdata->get_bus_wd(slot->id) >= 4) @@ -1923,7 +1949,7 @@ static int dw_mci_probe(struct platform_device *pdev) * should put it in the platform data. */ fifo_size = mci_readl(host, FIFOTH); - fifo_size = 1 + ((fifo_size >> 16) & 0x7ff); + fifo_size = 1 + ((fifo_size >> 16) & 0xfff); } else { fifo_size = host->pdata->fifo_depth; } @@ -2062,14 +2088,14 @@ static int __exit dw_mci_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP /* * TODO: we should probably disable the clock to the card in the suspend path. */ -static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg) +static int dw_mci_suspend(struct device *dev) { int i, ret; - struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci *host = dev_get_drvdata(dev); for (i = 0; i < host->num_slots; i++) { struct dw_mci_slot *slot = host->slot[i]; @@ -2092,10 +2118,10 @@ static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg) return 0; } -static int dw_mci_resume(struct platform_device *pdev) +static int dw_mci_resume(struct device *dev) { int i, ret; - struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci *host = dev_get_drvdata(dev); if (host->vmmc) regulator_enable(host->vmmc); @@ -2103,7 +2129,7 @@ static int dw_mci_resume(struct platform_device *pdev) if (host->dma_ops->init) host->dma_ops->init(host); - if (!mci_wait_reset(&pdev->dev, host)) { + if (!mci_wait_reset(dev, host)) { ret = -ENODEV; return ret; } @@ -2131,14 +2157,15 @@ static int dw_mci_resume(struct platform_device *pdev) #else #define dw_mci_suspend NULL #define dw_mci_resume NULL -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(dw_mci_pmops, dw_mci_suspend, dw_mci_resume); static struct platform_driver dw_mci_driver = { .remove = __exit_p(dw_mci_remove), - .suspend = dw_mci_suspend, - .resume = dw_mci_resume, .driver = { .name = "dw_mmc", + .pm = &dw_mci_pmops, }, }; diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 72c071f6e00..df392a1143f 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -126,7 +126,7 @@ #define SDMMC_CMD_RESP_EXP BIT(6) #define SDMMC_CMD_INDX(n) ((n) & 0x1F) /* Status register defines */ -#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FF) +#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF) /* Internal DMAC interrupt defines */ #define SDMMC_IDMAC_INT_AI BIT(9) #define SDMMC_IDMAC_INT_NI BIT(8) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 74218ad677e..c8852a8128a 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1012,17 +1012,7 @@ static struct platform_driver jz4740_mmc_driver = { }, }; -static int __init jz4740_mmc_init(void) -{ - return platform_driver_register(&jz4740_mmc_driver); -} -module_init(jz4740_mmc_init); - -static void __exit jz4740_mmc_exit(void) -{ - platform_driver_unregister(&jz4740_mmc_driver); -} -module_exit(jz4740_mmc_exit); +module_platform_driver(jz4740_mmc_driver); MODULE_DESCRIPTION("JZ4740 SD/MMC controller driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 92946b84e9f..273306c68d5 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1525,7 +1525,6 @@ static struct of_device_id mmc_spi_of_match_table[] __devinitdata = { static struct spi_driver mmc_spi_driver = { .driver = { .name = "mmc_spi", - .bus = &spi_bus_type, .owner = THIS_MODULE, .of_match_table = mmc_spi_of_match_table, }, diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index fa8dd2fda4b..ece03b491c7 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1245,6 +1245,7 @@ static int __devinit mmci_probe(struct amba_device *dev, if (host->vcc == NULL) mmc->ocr_avail = plat->ocr_mask; mmc->caps = plat->capabilities; + mmc->caps2 = plat->capabilities2; /* * We can do SGIO diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c index 80d8eb143b4..1d14cda95e5 100644 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -689,8 +689,8 @@ msmsdcc_pio_irq(int irq, void *dev_id) /* Map the current scatter buffer */ local_irq_save(flags); - buffer = kmap_atomic(sg_page(host->pio.sg), - KM_BIO_SRC_IRQ) + host->pio.sg->offset; + buffer = kmap_atomic(sg_page(host->pio.sg)) + + host->pio.sg->offset; buffer += host->pio.sg_off; remain = host->pio.sg->length - host->pio.sg_off; len = 0; @@ -700,7 +700,7 @@ msmsdcc_pio_irq(int irq, void *dev_id) len = msmsdcc_pio_write(host, buffer, remain, status); /* Unmap the buffer */ - kunmap_atomic(buffer, KM_BIO_SRC_IRQ); + kunmap_atomic(buffer); local_irq_restore(flags); host->pio.sg_off += len; @@ -1480,18 +1480,7 @@ static struct platform_driver msmsdcc_driver = { }, }; -static int __init msmsdcc_init(void) -{ - return platform_driver_register(&msmsdcc_driver); -} - -static void __exit msmsdcc_exit(void) -{ - platform_driver_unregister(&msmsdcc_driver); -} - -module_init(msmsdcc_init); -module_exit(msmsdcc_exit); +module_platform_driver(msmsdcc_driver); MODULE_DESCRIPTION("Qualcomm MSM 7X00A Multimedia Card Interface driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 8e0fbe99404..7088b40f957 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -1047,18 +1047,7 @@ static struct platform_driver mxcmci_driver = { } }; -static int __init mxcmci_init(void) -{ - return platform_driver_register(&mxcmci_driver); -} - -static void __exit mxcmci_exit(void) -{ - platform_driver_unregister(&mxcmci_driver); -} - -module_init(mxcmci_init); -module_exit(mxcmci_exit); +module_platform_driver(mxcmci_driver); MODULE_DESCRIPTION("i.MX Multimedia Card Interface Driver"); MODULE_AUTHOR("Sascha Hauer, Pengutronix"); diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 973011f9a29..4e2e019dd5c 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -855,18 +855,7 @@ static struct platform_driver mxs_mmc_driver = { }, }; -static int __init mxs_mmc_init(void) -{ - return platform_driver_register(&mxs_mmc_driver); -} - -static void __exit mxs_mmc_exit(void) -{ - platform_driver_unregister(&mxs_mmc_driver); -} - -module_init(mxs_mmc_init); -module_exit(mxs_mmc_exit); +module_platform_driver(mxs_mmc_driver); MODULE_DESCRIPTION("FREESCALE MXS MMC peripheral"); MODULE_AUTHOR("Freescale Semiconductor"); diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index d1fb561e089..fd0c661bbad 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -24,7 +24,6 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/platform_device.h> -#include <linux/workqueue.h> #include <linux/timer.h> #include <linux/clk.h> #include <linux/mmc/host.h> @@ -120,7 +119,6 @@ #define MMC_AUTOSUSPEND_DELAY 100 #define MMC_TIMEOUT_MS 20 -#define OMAP_MMC_MASTER_CLOCK 96000000 #define OMAP_MMC_MIN_CLOCK 400000 #define OMAP_MMC_MAX_CLOCK 52000000 #define DRIVER_NAME "omap_hsmmc" @@ -163,7 +161,6 @@ struct omap_hsmmc_host { */ struct regulator *vcc; struct regulator *vcc_aux; - struct work_struct mmc_carddetect_work; void __iomem *base; resource_size_t mapbase; spinlock_t irq_lock; /* Prevent races with irq handler */ @@ -598,12 +595,12 @@ static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host) } /* Calculate divisor for the given clock frequency */ -static u16 calc_divisor(struct mmc_ios *ios) +static u16 calc_divisor(struct omap_hsmmc_host *host, struct mmc_ios *ios) { u16 dsor = 0; if (ios->clock) { - dsor = DIV_ROUND_UP(OMAP_MMC_MASTER_CLOCK, ios->clock); + dsor = DIV_ROUND_UP(clk_get_rate(host->fclk), ios->clock); if (dsor > 250) dsor = 250; } @@ -623,7 +620,7 @@ static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host) regval = OMAP_HSMMC_READ(host->base, SYSCTL); regval = regval & ~(CLKD_MASK | DTO_MASK); - regval = regval | (calc_divisor(ios) << 6) | (DTO << 16); + regval = regval | (calc_divisor(host, ios) << 6) | (DTO << 16); OMAP_HSMMC_WRITE(host->base, SYSCTL, regval); OMAP_HSMMC_WRITE(host->base, SYSCTL, OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); @@ -1280,17 +1277,16 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host) } /* - * Work Item to notify the core about card insertion/removal + * irq handler to notify the core about card insertion/removal */ -static void omap_hsmmc_detect(struct work_struct *work) +static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id) { - struct omap_hsmmc_host *host = - container_of(work, struct omap_hsmmc_host, mmc_carddetect_work); + struct omap_hsmmc_host *host = dev_id; struct omap_mmc_slot_data *slot = &mmc_slot(host); int carddetect; if (host->suspended) - return; + return IRQ_HANDLED; sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch"); @@ -1305,19 +1301,6 @@ static void omap_hsmmc_detect(struct work_struct *work) mmc_detect_change(host->mmc, (HZ * 200) / 1000); else mmc_detect_change(host->mmc, (HZ * 50) / 1000); -} - -/* - * ISR for handling card insertion and removal - */ -static irqreturn_t omap_hsmmc_cd_handler(int irq, void *dev_id) -{ - struct omap_hsmmc_host *host = (struct omap_hsmmc_host *)dev_id; - - if (host->suspended) - return IRQ_HANDLED; - schedule_work(&host->mmc_carddetect_work); - return IRQ_HANDLED; } @@ -1919,7 +1902,6 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) host->next_data.cookie = 1; platform_set_drvdata(pdev, host); - INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect); mmc->ops = &omap_hsmmc_ops; @@ -2049,10 +2031,11 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) /* Request IRQ for card detect */ if ((mmc_slot(host).card_detect_irq)) { - ret = request_irq(mmc_slot(host).card_detect_irq, - omap_hsmmc_cd_handler, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - mmc_hostname(mmc), host); + ret = request_threaded_irq(mmc_slot(host).card_detect_irq, + NULL, + omap_hsmmc_detect, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + mmc_hostname(mmc), host); if (ret) { dev_dbg(mmc_dev(host->mmc), "Unable to grab MMC CD IRQ\n"); @@ -2131,7 +2114,6 @@ static int omap_hsmmc_remove(struct platform_device *pdev) free_irq(host->irq, host); if (mmc_slot(host).card_detect_irq) free_irq(mmc_slot(host).card_detect_irq, host); - flush_work_sync(&host->mmc_carddetect_work); pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); @@ -2178,7 +2160,6 @@ static int omap_hsmmc_suspend(struct device *dev) return ret; } } - cancel_work_sync(&host->mmc_carddetect_work); ret = mmc_suspend_host(host->mmc); if (ret) { diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index fc4356e00d4..cb2dc0e75ba 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -872,18 +872,7 @@ static struct platform_driver pxamci_driver = { }, }; -static int __init pxamci_init(void) -{ - return platform_driver_register(&pxamci_driver); -} - -static void __exit pxamci_exit(void) -{ - platform_driver_unregister(&pxamci_driver); -} - -module_init(pxamci_init); -module_exit(pxamci_exit); +module_platform_driver(pxamci_driver); MODULE_DESCRIPTION("PXA Multimedia Card Interface Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 720f99334a7..1bcfd6dbb5c 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1914,18 +1914,7 @@ static struct platform_driver s3cmci_driver = { .shutdown = s3cmci_shutdown, }; -static int __init s3cmci_init(void) -{ - return platform_driver_register(&s3cmci_driver); -} - -static void __exit s3cmci_exit(void) -{ - platform_driver_unregister(&s3cmci_driver); -} - -module_init(s3cmci_init); -module_exit(s3cmci_exit); +module_platform_driver(s3cmci_driver); MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c index b4257e70061..28a870804f6 100644 --- a/drivers/mmc/host/sdhci-cns3xxx.c +++ b/drivers/mmc/host/sdhci-cns3xxx.c @@ -115,17 +115,7 @@ static struct platform_driver sdhci_cns3xxx_driver = { .remove = __devexit_p(sdhci_cns3xxx_remove), }; -static int __init sdhci_cns3xxx_init(void) -{ - return platform_driver_register(&sdhci_cns3xxx_driver); -} -module_init(sdhci_cns3xxx_init); - -static void __exit sdhci_cns3xxx_exit(void) -{ - platform_driver_unregister(&sdhci_cns3xxx_driver); -} -module_exit(sdhci_cns3xxx_exit); +module_platform_driver(sdhci_cns3xxx_driver); MODULE_DESCRIPTION("SDHCI driver for CNS3xxx"); MODULE_AUTHOR("Scott Shu, " diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c index a81312c91f7..46fd1fd1b60 100644 --- a/drivers/mmc/host/sdhci-dove.c +++ b/drivers/mmc/host/sdhci-dove.c @@ -88,17 +88,7 @@ static struct platform_driver sdhci_dove_driver = { .remove = __devexit_p(sdhci_dove_remove), }; -static int __init sdhci_dove_init(void) -{ - return platform_driver_register(&sdhci_dove_driver); -} -module_init(sdhci_dove_init); - -static void __exit sdhci_dove_exit(void) -{ - platform_driver_unregister(&sdhci_dove_driver); -} -module_exit(sdhci_dove_exit); +module_platform_driver(sdhci_dove_driver); MODULE_DESCRIPTION("SDHCI driver for Dove"); MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>, " diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 38ebc4ea259..d601e41af28 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -606,17 +606,7 @@ static struct platform_driver sdhci_esdhc_imx_driver = { .remove = __devexit_p(sdhci_esdhc_imx_remove), }; -static int __init sdhci_esdhc_imx_init(void) -{ - return platform_driver_register(&sdhci_esdhc_imx_driver); -} -module_init(sdhci_esdhc_imx_init); - -static void __exit sdhci_esdhc_imx_exit(void) -{ - platform_driver_unregister(&sdhci_esdhc_imx_driver); -} -module_exit(sdhci_esdhc_imx_exit); +module_platform_driver(sdhci_esdhc_imx_driver); MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC"); MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>"); diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h index c3b08f11194..b97b2f5dafd 100644 --- a/drivers/mmc/host/sdhci-esdhc.h +++ b/drivers/mmc/host/sdhci-esdhc.h @@ -73,7 +73,7 @@ static inline void esdhc_set_clock(struct sdhci_host *host, unsigned int clock) | (div << ESDHC_DIVIDER_SHIFT) | (pre_div << ESDHC_PREDIV_SHIFT)); sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL); - mdelay(100); + mdelay(1); out: host->clock = clock; } diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 01e5f627e0f..ff4adc01804 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -131,17 +131,7 @@ static struct platform_driver sdhci_esdhc_driver = { .remove = __devexit_p(sdhci_esdhc_remove), }; -static int __init sdhci_esdhc_init(void) -{ - return platform_driver_register(&sdhci_esdhc_driver); -} -module_init(sdhci_esdhc_init); - -static void __exit sdhci_esdhc_exit(void) -{ - platform_driver_unregister(&sdhci_esdhc_driver); -} -module_exit(sdhci_esdhc_exit); +module_platform_driver(sdhci_esdhc_driver); MODULE_DESCRIPTION("SDHCI OF driver for Freescale MPC eSDHC"); MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, " diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c index 3619adc7d9f..0ce088ae022 100644 --- a/drivers/mmc/host/sdhci-of-hlwd.c +++ b/drivers/mmc/host/sdhci-of-hlwd.c @@ -93,17 +93,7 @@ static struct platform_driver sdhci_hlwd_driver = { .remove = __devexit_p(sdhci_hlwd_remove), }; -static int __init sdhci_hlwd_init(void) -{ - return platform_driver_register(&sdhci_hlwd_driver); -} -module_init(sdhci_hlwd_init); - -static void __exit sdhci_hlwd_exit(void) -{ - platform_driver_unregister(&sdhci_hlwd_driver); -} -module_exit(sdhci_hlwd_exit); +module_platform_driver(sdhci_hlwd_driver); MODULE_DESCRIPTION("Nintendo Wii SDHCI OF driver"); MODULE_AUTHOR("The GameCube Linux Team, Albert Herranz"); diff --git a/drivers/mmc/host/sdhci-pci-data.c b/drivers/mmc/host/sdhci-pci-data.c new file mode 100644 index 00000000000..a611217769f --- /dev/null +++ b/drivers/mmc/host/sdhci-pci-data.c @@ -0,0 +1,5 @@ +#include <linux/module.h> +#include <linux/mmc/sdhci-pci-data.h> + +struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno); +EXPORT_SYMBOL_GPL(sdhci_pci_get_data); diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 6878a94626b..7165e6a0927 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -23,8 +23,8 @@ #include <linux/scatterlist.h> #include <linux/io.h> #include <linux/gpio.h> -#include <linux/sfi.h> #include <linux/pm_runtime.h> +#include <linux/mmc/sdhci-pci-data.h> #include "sdhci.h" @@ -61,6 +61,7 @@ struct sdhci_pci_fixes { struct sdhci_pci_slot { struct sdhci_pci_chip *chip; struct sdhci_host *host; + struct sdhci_pci_data *data; int pci_bar; int rst_n_gpio; @@ -171,32 +172,9 @@ static int mrst_hc_probe(struct sdhci_pci_chip *chip) return 0; } -/* Medfield eMMC hardware reset GPIOs */ -static int mfd_emmc0_rst_gpio = -EINVAL; -static int mfd_emmc1_rst_gpio = -EINVAL; - -static int mfd_emmc_gpio_parse(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb = (struct sfi_table_simple *)table; - struct sfi_gpio_table_entry *entry; - int i, num; - - num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); - entry = (struct sfi_gpio_table_entry *)sb->pentry; - - for (i = 0; i < num; i++, entry++) { - if (!strncmp(entry->pin_name, "emmc0_rst", SFI_NAME_LEN)) - mfd_emmc0_rst_gpio = entry->pin_no; - else if (!strncmp(entry->pin_name, "emmc1_rst", SFI_NAME_LEN)) - mfd_emmc1_rst_gpio = entry->pin_no; - } - - return 0; -} - #ifdef CONFIG_PM_RUNTIME -static irqreturn_t mfd_sd_cd(int irq, void *dev_id) +static irqreturn_t sdhci_pci_sd_cd(int irq, void *dev_id) { struct sdhci_pci_slot *slot = dev_id; struct sdhci_host *host = slot->host; @@ -205,15 +183,16 @@ static irqreturn_t mfd_sd_cd(int irq, void *dev_id) return IRQ_HANDLED; } -#define MFLD_SD_CD_PIN 69 - -static int mfd_sd_probe_slot(struct sdhci_pci_slot *slot) +static void sdhci_pci_add_own_cd(struct sdhci_pci_slot *slot) { - int err, irq, gpio = MFLD_SD_CD_PIN; + int err, irq, gpio = slot->cd_gpio; slot->cd_gpio = -EINVAL; slot->cd_irq = -EINVAL; + if (!gpio_is_valid(gpio)) + return; + err = gpio_request(gpio, "sd_cd"); if (err < 0) goto out; @@ -226,72 +205,53 @@ static int mfd_sd_probe_slot(struct sdhci_pci_slot *slot) if (irq < 0) goto out_free; - err = request_irq(irq, mfd_sd_cd, IRQF_TRIGGER_RISING | + err = request_irq(irq, sdhci_pci_sd_cd, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, "sd_cd", slot); if (err) goto out_free; slot->cd_gpio = gpio; slot->cd_irq = irq; - slot->host->quirks2 |= SDHCI_QUIRK2_OWN_CARD_DETECTION; - return 0; + return; out_free: gpio_free(gpio); out: dev_warn(&slot->chip->pdev->dev, "failed to setup card detect wake up\n"); - return 0; } -static void mfd_sd_remove_slot(struct sdhci_pci_slot *slot, int dead) +static void sdhci_pci_remove_own_cd(struct sdhci_pci_slot *slot) { if (slot->cd_irq >= 0) free_irq(slot->cd_irq, slot); - gpio_free(slot->cd_gpio); + if (gpio_is_valid(slot->cd_gpio)) + gpio_free(slot->cd_gpio); } #else -#define mfd_sd_probe_slot NULL -#define mfd_sd_remove_slot NULL +static inline void sdhci_pci_add_own_cd(struct sdhci_pci_slot *slot) +{ +} + +static inline void sdhci_pci_remove_own_cd(struct sdhci_pci_slot *slot) +{ +} #endif static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot) { - const char *name = NULL; - int gpio = -EINVAL; - - sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, mfd_emmc_gpio_parse); - - switch (slot->chip->pdev->device) { - case PCI_DEVICE_ID_INTEL_MFD_EMMC0: - gpio = mfd_emmc0_rst_gpio; - name = "eMMC0_reset"; - break; - case PCI_DEVICE_ID_INTEL_MFD_EMMC1: - gpio = mfd_emmc1_rst_gpio; - name = "eMMC1_reset"; - break; - } - - if (!gpio_request(gpio, name)) { - gpio_direction_output(gpio, 1); - slot->rst_n_gpio = gpio; - slot->host->mmc->caps |= MMC_CAP_HW_RESET; - } - slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE; - slot->host->mmc->caps2 = MMC_CAP2_BOOTPART_NOACC; - return 0; } -static void mfd_emmc_remove_slot(struct sdhci_pci_slot *slot, int dead) +static int mfd_sdio_probe_slot(struct sdhci_pci_slot *slot) { - gpio_free(slot->rst_n_gpio); + slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD; + return 0; } static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = { @@ -307,20 +267,18 @@ static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = { static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, .allow_runtime_pm = true, - .probe_slot = mfd_sd_probe_slot, - .remove_slot = mfd_sd_remove_slot, }; static const struct sdhci_pci_fixes sdhci_intel_mfd_sdio = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, .allow_runtime_pm = true, + .probe_slot = mfd_sdio_probe_slot, }; static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, .allow_runtime_pm = true, .probe_slot = mfd_emmc_probe_slot, - .remove_slot = mfd_emmc_remove_slot, }; /* O2Micro extra registers */ @@ -1012,11 +970,8 @@ static int sdhci_pci_suspend(struct device *dev) ret = sdhci_suspend_host(slot->host); - if (ret) { - for (i--; i >= 0; i--) - sdhci_resume_host(chip->slots[i]->host); - return ret; - } + if (ret) + goto err_pci_suspend; slot_pm_flags = slot->host->mmc->pm_flags; if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ) @@ -1027,11 +982,8 @@ static int sdhci_pci_suspend(struct device *dev) if (chip->fixes && chip->fixes->suspend) { ret = chip->fixes->suspend(chip); - if (ret) { - for (i = chip->num_slots - 1; i >= 0; i--) - sdhci_resume_host(chip->slots[i]->host); - return ret; - } + if (ret) + goto err_pci_suspend; } pci_save_state(pdev); @@ -1048,6 +1000,11 @@ static int sdhci_pci_suspend(struct device *dev) } return 0; + +err_pci_suspend: + while (--i >= 0) + sdhci_resume_host(chip->slots[i]->host); + return ret; } static int sdhci_pci_resume(struct device *dev) @@ -1113,23 +1070,22 @@ static int sdhci_pci_runtime_suspend(struct device *dev) ret = sdhci_runtime_suspend_host(slot->host); - if (ret) { - for (i--; i >= 0; i--) - sdhci_runtime_resume_host(chip->slots[i]->host); - return ret; - } + if (ret) + goto err_pci_runtime_suspend; } if (chip->fixes && chip->fixes->suspend) { ret = chip->fixes->suspend(chip); - if (ret) { - for (i = chip->num_slots - 1; i >= 0; i--) - sdhci_runtime_resume_host(chip->slots[i]->host); - return ret; - } + if (ret) + goto err_pci_runtime_suspend; } return 0; + +err_pci_runtime_suspend: + while (--i >= 0) + sdhci_runtime_resume_host(chip->slots[i]->host); + return ret; } static int sdhci_pci_runtime_resume(struct device *dev) @@ -1190,11 +1146,12 @@ static const struct dev_pm_ops sdhci_pci_pm_ops = { \*****************************************************************************/ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( - struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar) + struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar, + int slotno) { struct sdhci_pci_slot *slot; struct sdhci_host *host; - int ret; + int ret, bar = first_bar + slotno; if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar); @@ -1228,6 +1185,23 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( slot->host = host; slot->pci_bar = bar; slot->rst_n_gpio = -EINVAL; + slot->cd_gpio = -EINVAL; + + /* Retrieve platform data if there is any */ + if (*sdhci_pci_get_data) + slot->data = sdhci_pci_get_data(pdev, slotno); + + if (slot->data) { + if (slot->data->setup) { + ret = slot->data->setup(slot->data); + if (ret) { + dev_err(&pdev->dev, "platform setup failed\n"); + goto free; + } + } + slot->rst_n_gpio = slot->data->rst_n_gpio; + slot->cd_gpio = slot->data->cd_gpio; + } host->hw_name = "PCI"; host->ops = &sdhci_pci_ops; @@ -1238,7 +1212,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc)); if (ret) { dev_err(&pdev->dev, "cannot request region\n"); - goto free; + goto cleanup; } host->ioaddr = pci_ioremap_bar(pdev, bar); @@ -1254,15 +1228,30 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( goto unmap; } + if (gpio_is_valid(slot->rst_n_gpio)) { + if (!gpio_request(slot->rst_n_gpio, "eMMC_reset")) { + gpio_direction_output(slot->rst_n_gpio, 1); + slot->host->mmc->caps |= MMC_CAP_HW_RESET; + } else { + dev_warn(&pdev->dev, "failed to request rst_n_gpio\n"); + slot->rst_n_gpio = -EINVAL; + } + } + host->mmc->pm_caps = MMC_PM_KEEP_POWER | MMC_PM_WAKE_SDIO_IRQ; ret = sdhci_add_host(host); if (ret) goto remove; + sdhci_pci_add_own_cd(slot); + return slot; remove: + if (gpio_is_valid(slot->rst_n_gpio)) + gpio_free(slot->rst_n_gpio); + if (chip->fixes && chip->fixes->remove_slot) chip->fixes->remove_slot(slot, 0); @@ -1272,6 +1261,10 @@ unmap: release: pci_release_region(pdev, bar); +cleanup: + if (slot->data && slot->data->cleanup) + slot->data->cleanup(slot->data); + free: sdhci_free_host(host); @@ -1283,6 +1276,8 @@ static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot) int dead; u32 scratch; + sdhci_pci_remove_own_cd(slot); + dead = 0; scratch = readl(slot->host->ioaddr + SDHCI_INT_STATUS); if (scratch == (u32)-1) @@ -1290,9 +1285,15 @@ static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot) sdhci_remove_host(slot->host, dead); + if (gpio_is_valid(slot->rst_n_gpio)) + gpio_free(slot->rst_n_gpio); + if (slot->chip->fixes && slot->chip->fixes->remove_slot) slot->chip->fixes->remove_slot(slot, dead); + if (slot->data && slot->data->cleanup) + slot->data->cleanup(slot->data); + pci_release_region(slot->chip->pdev, slot->pci_bar); sdhci_free_host(slot->host); @@ -1379,7 +1380,7 @@ static int __devinit sdhci_pci_probe(struct pci_dev *pdev, slots = chip->num_slots; /* Quirk may have changed this */ for (i = 0; i < slots; i++) { - slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i); + slot = sdhci_pci_probe_slot(pdev, chip, first_bar, i); if (IS_ERR(slot)) { for (i--; i >= 0; i--) sdhci_pci_remove_slot(chip->slots[i]); diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c index 7a039c3cb1f..dbb75bfbcff 100644 --- a/drivers/mmc/host/sdhci-pxav2.c +++ b/drivers/mmc/host/sdhci-pxav2.c @@ -223,18 +223,8 @@ static struct platform_driver sdhci_pxav2_driver = { .probe = sdhci_pxav2_probe, .remove = __devexit_p(sdhci_pxav2_remove), }; -static int __init sdhci_pxav2_init(void) -{ - return platform_driver_register(&sdhci_pxav2_driver); -} - -static void __exit sdhci_pxav2_exit(void) -{ - platform_driver_unregister(&sdhci_pxav2_driver); -} -module_init(sdhci_pxav2_init); -module_exit(sdhci_pxav2_exit); +module_platform_driver(sdhci_pxav2_driver); MODULE_DESCRIPTION("SDHCI driver for pxav2"); MODULE_AUTHOR("Marvell International Ltd."); diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index 15673a7ee6a..f2969568355 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -269,18 +269,8 @@ static struct platform_driver sdhci_pxav3_driver = { .probe = sdhci_pxav3_probe, .remove = __devexit_p(sdhci_pxav3_remove), }; -static int __init sdhci_pxav3_init(void) -{ - return platform_driver_register(&sdhci_pxav3_driver); -} - -static void __exit sdhci_pxav3_exit(void) -{ - platform_driver_unregister(&sdhci_pxav3_driver); -} -module_init(sdhci_pxav3_init); -module_exit(sdhci_pxav3_exit); +module_platform_driver(sdhci_pxav3_driver); MODULE_DESCRIPTION("SDHCI driver for pxav3"); MODULE_AUTHOR("Marvell International Ltd."); diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 9a20d1f55bb..1af756ee0f9 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -80,7 +80,7 @@ static void sdhci_s3c_check_sclk(struct sdhci_host *host) tmp &= ~S3C_SDHCI_CTRL2_SELBASECLK_MASK; tmp |= ourhost->cur_clk << S3C_SDHCI_CTRL2_SELBASECLK_SHIFT; - writel(tmp, host->ioaddr + 0x80); + writel(tmp, host->ioaddr + S3C_SDHCI_CONTROL2); } } @@ -521,6 +521,9 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev) if (pdata->host_caps) host->mmc->caps |= pdata->host_caps; + if (pdata->pm_caps) + host->mmc->pm_caps |= pdata->pm_caps; + host->quirks |= (SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE); @@ -654,18 +657,7 @@ static struct platform_driver sdhci_s3c_driver = { }, }; -static int __init sdhci_s3c_init(void) -{ - return platform_driver_register(&sdhci_s3c_driver); -} - -static void __exit sdhci_s3c_exit(void) -{ - platform_driver_unregister(&sdhci_s3c_driver); -} - -module_init(sdhci_s3c_init); -module_exit(sdhci_s3c_exit); +module_platform_driver(sdhci_s3c_driver); MODULE_DESCRIPTION("Samsung SDHCI (HSMMC) glue"); MODULE_AUTHOR("Ben Dooks, <ben@simtec.co.uk>"); diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index 63cc8b6a1c9..b7f8b33c5f1 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -21,6 +21,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/platform_device.h> +#include <linux/pm.h> #include <linux/slab.h> #include <linux/mmc/host.h> #include <linux/mmc/sdhci-spear.h> @@ -271,26 +272,54 @@ static int __devexit sdhci_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int sdhci_suspend(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); + struct spear_sdhci *sdhci = dev_get_platdata(dev); + int ret; + + ret = sdhci_suspend_host(host); + if (!ret) + clk_disable(sdhci->clk); + + return ret; +} + +static int sdhci_resume(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); + struct spear_sdhci *sdhci = dev_get_platdata(dev); + int ret; + + ret = clk_enable(sdhci->clk); + if (ret) { + dev_dbg(dev, "Resume: Error enabling clock\n"); + return ret; + } + + return sdhci_resume_host(host); +} + +const struct dev_pm_ops sdhci_pm_ops = { + .suspend = sdhci_suspend, + .resume = sdhci_resume, +}; +#endif + static struct platform_driver sdhci_driver = { .driver = { .name = "sdhci", .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &sdhci_pm_ops, +#endif }, .probe = sdhci_probe, .remove = __devexit_p(sdhci_remove), }; -static int __init sdhci_init(void) -{ - return platform_driver_register(&sdhci_driver); -} -module_init(sdhci_init); - -static void __exit sdhci_exit(void) -{ - platform_driver_unregister(&sdhci_driver); -} -module_exit(sdhci_exit); +module_platform_driver(sdhci_driver); MODULE_DESCRIPTION("SPEAr Secure Digital Host Controller Interface driver"); MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>"); diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index e2e18d3f949..78a36eba4df 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -324,17 +324,7 @@ static struct platform_driver sdhci_tegra_driver = { .remove = __devexit_p(sdhci_tegra_remove), }; -static int __init sdhci_tegra_init(void) -{ - return platform_driver_register(&sdhci_tegra_driver); -} -module_init(sdhci_tegra_init); - -static void __exit sdhci_tegra_exit(void) -{ - platform_driver_unregister(&sdhci_tegra_driver); -} -module_exit(sdhci_tegra_exit); +module_platform_driver(sdhci_tegra_driver); MODULE_DESCRIPTION("SDHCI driver for Tegra"); MODULE_AUTHOR(" Google, Inc."); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 19ed580f2ca..8d66706824a 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -49,7 +49,7 @@ static void sdhci_finish_data(struct sdhci_host *); static void sdhci_send_command(struct sdhci_host *, struct mmc_command *); static void sdhci_finish_command(struct sdhci_host *); -static int sdhci_execute_tuning(struct mmc_host *mmc); +static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode); static void sdhci_tuning_timer(unsigned long data); #ifdef CONFIG_PM_RUNTIME @@ -146,10 +146,8 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable) { u32 present, irqs; - if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) - return; - - if (host->quirks2 & SDHCI_QUIRK2_OWN_CARD_DETECTION) + if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) || + !mmc_card_is_removable(host->mmc)) return; present = sdhci_readl(host, SDHCI_PRESENT_STATE) & @@ -214,6 +212,11 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask) if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier); + + if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { + if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL)) + host->ops->enable_dma(host); + } } static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios); @@ -423,12 +426,12 @@ static void sdhci_transfer_pio(struct sdhci_host *host) static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags) { local_irq_save(*flags); - return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; + return kmap_atomic(sg_page(sg)) + sg->offset; } static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags) { - kunmap_atomic(buffer, KM_BIO_SRC_IRQ); + kunmap_atomic(buffer); local_irq_restore(*flags); } @@ -1016,7 +1019,8 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) flags |= SDHCI_CMD_INDEX; /* CMD19 is special in that the Data Present Select should be set */ - if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK)) + if (cmd->data || cmd->opcode == MMC_SEND_TUNING_BLOCK || + cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200) flags |= SDHCI_CMD_DATA; sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND); @@ -1066,12 +1070,15 @@ static void sdhci_finish_command(struct sdhci_host *host) static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) { int div = 0; /* Initialized for compiler warning */ + int real_div = div, clk_mul = 1; u16 clk = 0; unsigned long timeout; - if (clock == host->clock) + if (clock && clock == host->clock) return; + host->mmc->actual_clock = 0; + if (host->ops->set_clock) { host->ops->set_clock(host, clock); if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) @@ -1109,6 +1116,8 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) * Control register. */ clk = SDHCI_PROG_CLOCK_MODE; + real_div = div; + clk_mul = host->clk_mul; div--; } } else { @@ -1122,6 +1131,7 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) break; } } + real_div = div; div >>= 1; } } else { @@ -1130,9 +1140,13 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) if ((host->max_clk / div) <= clock) break; } + real_div = div; div >>= 1; } + if (real_div) + host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div; + clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT; clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN) << SDHCI_DIVIDER_HI_SHIFT; @@ -1160,7 +1174,7 @@ out: host->clock = clock; } -static void sdhci_set_power(struct sdhci_host *host, unsigned short power) +static int sdhci_set_power(struct sdhci_host *host, unsigned short power) { u8 pwr = 0; @@ -1183,13 +1197,13 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) } if (host->pwr == pwr) - return; + return -1; host->pwr = pwr; if (pwr == 0) { sdhci_writeb(host, 0, SDHCI_POWER_CONTROL); - return; + return 0; } /* @@ -1216,6 +1230,8 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) */ if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER) mdelay(10); + + return power; } /*****************************************************************************\ @@ -1277,7 +1293,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) if ((host->flags & SDHCI_NEEDS_RETUNING) && !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) { spin_unlock_irqrestore(&host->lock, flags); - sdhci_execute_tuning(mmc); + sdhci_execute_tuning(mmc, mrq->cmd->opcode); spin_lock_irqsave(&host->lock, flags); /* Restore original mmc_request structure */ @@ -1297,12 +1313,17 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) { unsigned long flags; + int vdd_bit = -1; u8 ctrl; spin_lock_irqsave(&host->lock, flags); - if (host->flags & SDHCI_DEVICE_DEAD) - goto out; + if (host->flags & SDHCI_DEVICE_DEAD) { + spin_unlock_irqrestore(&host->lock, flags); + if (host->vmmc && ios->power_mode == MMC_POWER_OFF) + mmc_regulator_set_ocr(host->mmc, host->vmmc, 0); + return; + } /* * Reset the chip on each power off. @@ -1316,9 +1337,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) sdhci_set_clock(host, ios->clock); if (ios->power_mode == MMC_POWER_OFF) - sdhci_set_power(host, -1); + vdd_bit = sdhci_set_power(host, -1); else - sdhci_set_power(host, ios->vdd); + vdd_bit = sdhci_set_power(host, ios->vdd); + + if (host->vmmc && vdd_bit != -1) { + spin_unlock_irqrestore(&host->lock, flags); + mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd_bit); + spin_lock_irqsave(&host->lock, flags); + } if (host->ops->platform_send_init_74_clocks) host->ops->platform_send_init_74_clocks(host, ios->power_mode); @@ -1361,11 +1388,11 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) unsigned int clock; /* In case of UHS-I modes, set High Speed Enable */ - if ((ios->timing == MMC_TIMING_UHS_SDR50) || + if ((ios->timing == MMC_TIMING_MMC_HS200) || + (ios->timing == MMC_TIMING_UHS_SDR50) || (ios->timing == MMC_TIMING_UHS_SDR104) || (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR12)) + (ios->timing == MMC_TIMING_UHS_SDR25)) ctrl |= SDHCI_CTRL_HISPD; ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); @@ -1415,7 +1442,9 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); /* Select Bus Speed Mode for host */ ctrl_2 &= ~SDHCI_CTRL_UHS_MASK; - if (ios->timing == MMC_TIMING_UHS_SDR12) + if (ios->timing == MMC_TIMING_MMC_HS200) + ctrl_2 |= SDHCI_CTRL_HS_SDR200; + else if (ios->timing == MMC_TIMING_UHS_SDR12) ctrl_2 |= SDHCI_CTRL_UHS_SDR12; else if (ios->timing == MMC_TIMING_UHS_SDR25) ctrl_2 |= SDHCI_CTRL_UHS_SDR25; @@ -1443,7 +1472,6 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS) sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); -out: mmiowb(); spin_unlock_irqrestore(&host->lock, flags); } @@ -1663,7 +1691,7 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, return err; } -static int sdhci_execute_tuning(struct mmc_host *mmc) +static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host; u16 ctrl; @@ -1671,6 +1699,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc) int tuning_loop_counter = MAX_TUNING_LOOP; unsigned long timeout; int err = 0; + bool requires_tuning_nonuhs = false; host = mmc_priv(mmc); @@ -1681,13 +1710,19 @@ static int sdhci_execute_tuning(struct mmc_host *mmc) ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); /* - * Host Controller needs tuning only in case of SDR104 mode - * and for SDR50 mode when Use Tuning for SDR50 is set in + * The Host Controller needs tuning only in case of SDR104 mode + * and for SDR50 mode when Use Tuning for SDR50 is set in the * Capabilities register. + * If the Host Controller supports the HS200 mode then the + * tuning function has to be executed. */ + if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) && + (host->flags & SDHCI_SDR50_NEEDS_TUNING || + host->flags & SDHCI_HS200_NEEDS_TUNING)) + requires_tuning_nonuhs = true; + if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) || - (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) && - (host->flags & SDHCI_SDR50_NEEDS_TUNING))) + requires_tuning_nonuhs) ctrl |= SDHCI_CTRL_EXEC_TUNING; else { spin_unlock(&host->lock); @@ -1723,7 +1758,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc) if (!tuning_loop_counter && !timeout) break; - cmd.opcode = MMC_SEND_TUNING_BLOCK; + cmd.opcode = opcode; cmd.arg = 0; cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; cmd.retries = 0; @@ -1738,7 +1773,17 @@ static int sdhci_execute_tuning(struct mmc_host *mmc) * block to the Host Controller. So we set the block size * to 64 here. */ - sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE); + if (cmd.opcode == MMC_SEND_TUNING_BLOCK_HS200) { + if (mmc->ios.bus_width == MMC_BUS_WIDTH_8) + sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 128), + SDHCI_BLOCK_SIZE); + else if (mmc->ios.bus_width == MMC_BUS_WIDTH_4) + sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), + SDHCI_BLOCK_SIZE); + } else { + sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), + SDHCI_BLOCK_SIZE); + } /* * The tuning block is sent by the card to the host controller. @@ -2121,12 +2166,14 @@ static void sdhci_show_adma_error(struct sdhci_host *host) { } static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) { + u32 command; BUG_ON(intmask == 0); /* CMD19 generates _only_ Buffer Read Ready interrupt */ if (intmask & SDHCI_INT_DATA_AVAIL) { - if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) == - MMC_SEND_TUNING_BLOCK) { + command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)); + if (command == MMC_SEND_TUNING_BLOCK || + command == MMC_SEND_TUNING_BLOCK_HS200) { host->tuning_done = 1; wake_up(&host->buf_ready_int); return; @@ -2330,26 +2377,33 @@ out: int sdhci_suspend_host(struct sdhci_host *host) { int ret; + bool has_tuning_timer; sdhci_disable_card_detection(host); /* Disable tuning since we are suspending */ - if (host->version >= SDHCI_SPEC_300 && host->tuning_count && - host->tuning_mode == SDHCI_TUNING_MODE_1) { + has_tuning_timer = host->version >= SDHCI_SPEC_300 && + host->tuning_count && host->tuning_mode == SDHCI_TUNING_MODE_1; + if (has_tuning_timer) { + del_timer_sync(&host->tuning_timer); host->flags &= ~SDHCI_NEEDS_RETUNING; - mod_timer(&host->tuning_timer, jiffies + - host->tuning_count * HZ); } ret = mmc_suspend_host(host->mmc); - if (ret) + if (ret) { + if (has_tuning_timer) { + host->flags |= SDHCI_NEEDS_RETUNING; + mod_timer(&host->tuning_timer, jiffies + + host->tuning_count * HZ); + } + + sdhci_enable_card_detection(host); + return ret; + } free_irq(host->irq, host); - if (host->vmmc) - ret = regulator_disable(host->vmmc); - return ret; } @@ -2359,12 +2413,6 @@ int sdhci_resume_host(struct sdhci_host *host) { int ret; - if (host->vmmc) { - int ret = regulator_enable(host->vmmc); - if (ret) - return ret; - } - if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { if (host->ops->enable_dma) host->ops->enable_dma(host); @@ -2727,10 +2775,14 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[1] & SDHCI_SUPPORT_DDR50) mmc->caps |= MMC_CAP_UHS_DDR50; - /* Does the host needs tuning for SDR50? */ + /* Does the host need tuning for SDR50? */ if (caps[1] & SDHCI_USE_SDR50_TUNING) host->flags |= SDHCI_SDR50_NEEDS_TUNING; + /* Does the host need tuning for HS200? */ + if (mmc->caps2 & MMC_CAP2_HS200) + host->flags |= SDHCI_HS200_NEEDS_TUNING; + /* Driver Type(s) (A, C, D) supported by the host */ if (caps[1] & SDHCI_DRIVER_TYPE_A) mmc->caps |= MMC_CAP_DRIVER_TYPE_A; @@ -2926,8 +2978,6 @@ int sdhci_add_host(struct sdhci_host *host) if (IS_ERR(host->vmmc)) { pr_info("%s: no vmmc regulator found\n", mmc_hostname(mmc)); host->vmmc = NULL; - } else { - regulator_enable(host->vmmc); } sdhci_init(host, 0); @@ -3016,10 +3066,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); - if (host->vmmc) { - regulator_disable(host->vmmc); + if (host->vmmc) regulator_put(host->vmmc); - } kfree(host->adma_desc); kfree(host->align_buffer); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index a04d4d0c6fd..ad265b96b75 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -158,6 +158,7 @@ #define SDHCI_CTRL_UHS_SDR50 0x0002 #define SDHCI_CTRL_UHS_SDR104 0x0003 #define SDHCI_CTRL_UHS_DDR50 0x0004 +#define SDHCI_CTRL_HS_SDR200 0x0005 /* reserved value in SDIO spec */ #define SDHCI_CTRL_VDD_180 0x0008 #define SDHCI_CTRL_DRV_TYPE_MASK 0x0030 #define SDHCI_CTRL_DRV_TYPE_B 0x0000 diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index d5505f3fe2a..4a2c5b2355f 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -16,6 +16,33 @@ * */ +/* + * The MMCIF driver is now processing MMC requests asynchronously, according + * to the Linux MMC API requirement. + * + * The MMCIF driver processes MMC requests in up to 3 stages: command, optional + * data, and optional stop. To achieve asynchronous processing each of these + * stages is split into two halves: a top and a bottom half. The top half + * initialises the hardware, installs a timeout handler to handle completion + * timeouts, and returns. In case of the command stage this immediately returns + * control to the caller, leaving all further processing to run asynchronously. + * All further request processing is performed by the bottom halves. + * + * The bottom half further consists of a "hard" IRQ handler, an IRQ handler + * thread, a DMA completion callback, if DMA is used, a timeout work, and + * request- and stage-specific handler methods. + * + * Each bottom half run begins with either a hardware interrupt, a DMA callback + * invocation, or a timeout work run. In case of an error or a successful + * processing completion, the MMC core is informed and the request processing is + * finished. In case processing has to continue, i.e., if data has to be read + * from or written to the card, or if a stop command has to be sent, the next + * top half is called, which performs the necessary hardware handling and + * reschedules the timeout work. This returns the driver state machine into the + * bottom half waiting state. + */ + +#include <linux/bitops.h> #include <linux/clk.h> #include <linux/completion.h> #include <linux/delay.h> @@ -123,6 +150,11 @@ #define MASK_MRBSYTO (1 << 1) #define MASK_MRSPTO (1 << 0) +#define MASK_START_CMD (MASK_MCMDVIO | MASK_MBUFVIO | MASK_MWDATERR | \ + MASK_MRDATERR | MASK_MRIDXERR | MASK_MRSPERR | \ + MASK_MCCSTO | MASK_MCRCSTO | MASK_MWDATTO | \ + MASK_MRDATTO | MASK_MRBSYTO | MASK_MRSPTO) + /* CE_HOST_STS1 */ #define STS1_CMDSEQ (1 << 31) @@ -162,9 +194,21 @@ enum mmcif_state { STATE_IOS, }; +enum mmcif_wait_for { + MMCIF_WAIT_FOR_REQUEST, + MMCIF_WAIT_FOR_CMD, + MMCIF_WAIT_FOR_MREAD, + MMCIF_WAIT_FOR_MWRITE, + MMCIF_WAIT_FOR_READ, + MMCIF_WAIT_FOR_WRITE, + MMCIF_WAIT_FOR_READ_END, + MMCIF_WAIT_FOR_WRITE_END, + MMCIF_WAIT_FOR_STOP, +}; + struct sh_mmcif_host { struct mmc_host *mmc; - struct mmc_data *data; + struct mmc_request *mrq; struct platform_device *pd; struct sh_dmae_slave dma_slave_tx; struct sh_dmae_slave dma_slave_rx; @@ -172,11 +216,17 @@ struct sh_mmcif_host { unsigned int clk; int bus_width; bool sd_error; + bool dying; long timeout; void __iomem *addr; - struct completion intr_wait; + u32 *pio_ptr; + spinlock_t lock; /* protect sh_mmcif_host::state */ enum mmcif_state state; - spinlock_t lock; + enum mmcif_wait_for wait_for; + struct delayed_work timeout_work; + size_t blocksize; + int sg_idx; + int sg_blkidx; bool power; bool card_present; @@ -202,19 +252,21 @@ static inline void sh_mmcif_bitclr(struct sh_mmcif_host *host, static void mmcif_dma_complete(void *arg) { struct sh_mmcif_host *host = arg; + struct mmc_data *data = host->mrq->data; + dev_dbg(&host->pd->dev, "Command completed\n"); - if (WARN(!host->data, "%s: NULL data in DMA completion!\n", + if (WARN(!data, "%s: NULL data in DMA completion!\n", dev_name(&host->pd->dev))) return; - if (host->data->flags & MMC_DATA_READ) + if (data->flags & MMC_DATA_READ) dma_unmap_sg(host->chan_rx->device->dev, - host->data->sg, host->data->sg_len, + data->sg, data->sg_len, DMA_FROM_DEVICE); else dma_unmap_sg(host->chan_tx->device->dev, - host->data->sg, host->data->sg_len, + data->sg, data->sg_len, DMA_TO_DEVICE); complete(&host->dma_complete); @@ -222,13 +274,14 @@ static void mmcif_dma_complete(void *arg) static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) { - struct scatterlist *sg = host->data->sg; + struct mmc_data *data = host->mrq->data; + struct scatterlist *sg = data->sg; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *chan = host->chan_rx; dma_cookie_t cookie = -EINVAL; int ret; - ret = dma_map_sg(chan->device->dev, sg, host->data->sg_len, + ret = dma_map_sg(chan->device->dev, sg, data->sg_len, DMA_FROM_DEVICE); if (ret > 0) { host->dma_active = true; @@ -244,7 +297,7 @@ static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) dma_async_issue_pending(chan); } dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n", - __func__, host->data->sg_len, ret, cookie); + __func__, data->sg_len, ret, cookie); if (!desc) { /* DMA failed, fall back to PIO */ @@ -265,18 +318,19 @@ static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) } dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__, - desc, cookie, host->data->sg_len); + desc, cookie, data->sg_len); } static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) { - struct scatterlist *sg = host->data->sg; + struct mmc_data *data = host->mrq->data; + struct scatterlist *sg = data->sg; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *chan = host->chan_tx; dma_cookie_t cookie = -EINVAL; int ret; - ret = dma_map_sg(chan->device->dev, sg, host->data->sg_len, + ret = dma_map_sg(chan->device->dev, sg, data->sg_len, DMA_TO_DEVICE); if (ret > 0) { host->dma_active = true; @@ -292,7 +346,7 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) dma_async_issue_pending(chan); } dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n", - __func__, host->data->sg_len, ret, cookie); + __func__, data->sg_len, ret, cookie); if (!desc) { /* DMA failed, fall back to PIO */ @@ -399,7 +453,7 @@ static void sh_mmcif_clock_control(struct sh_mmcif_host *host, unsigned int clk) sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_SUP_PCLK); else sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_CLEAR & - (ilog2(__rounddown_pow_of_two(host->clk / clk)) << 16)); + ((fls(host->clk / clk) - 1) << 16)); sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_ENABLE); } @@ -421,7 +475,7 @@ static void sh_mmcif_sync_reset(struct sh_mmcif_host *host) static int sh_mmcif_error_manage(struct sh_mmcif_host *host) { u32 state1, state2; - int ret, timeout = 10000000; + int ret, timeout; host->sd_error = false; @@ -433,155 +487,212 @@ static int sh_mmcif_error_manage(struct sh_mmcif_host *host) if (state1 & STS1_CMDSEQ) { sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, CMD_CTRL_BREAK); sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, ~CMD_CTRL_BREAK); - while (1) { - timeout--; - if (timeout < 0) { - dev_err(&host->pd->dev, - "Forceed end of command sequence timeout err\n"); - return -EIO; - } + for (timeout = 10000000; timeout; timeout--) { if (!(sh_mmcif_readl(host->addr, MMCIF_CE_HOST_STS1) - & STS1_CMDSEQ)) + & STS1_CMDSEQ)) break; mdelay(1); } + if (!timeout) { + dev_err(&host->pd->dev, + "Forced end of command sequence timeout err\n"); + return -EIO; + } sh_mmcif_sync_reset(host); dev_dbg(&host->pd->dev, "Forced end of command sequence\n"); return -EIO; } if (state2 & STS2_CRC_ERR) { - dev_dbg(&host->pd->dev, ": Happened CRC error\n"); + dev_dbg(&host->pd->dev, ": CRC error\n"); ret = -EIO; } else if (state2 & STS2_TIMEOUT_ERR) { - dev_dbg(&host->pd->dev, ": Happened Timeout error\n"); + dev_dbg(&host->pd->dev, ": Timeout\n"); ret = -ETIMEDOUT; } else { - dev_dbg(&host->pd->dev, ": Happened End/Index error\n"); + dev_dbg(&host->pd->dev, ": End/Index error\n"); ret = -EIO; } return ret; } -static int sh_mmcif_single_read(struct sh_mmcif_host *host, - struct mmc_request *mrq) +static bool sh_mmcif_next_block(struct sh_mmcif_host *host, u32 *p) { - struct mmc_data *data = mrq->data; - long time; - u32 blocksize, i, *p = sg_virt(data->sg); + struct mmc_data *data = host->mrq->data; + + host->sg_blkidx += host->blocksize; + + /* data->sg->length must be a multiple of host->blocksize? */ + BUG_ON(host->sg_blkidx > data->sg->length); + + if (host->sg_blkidx == data->sg->length) { + host->sg_blkidx = 0; + if (++host->sg_idx < data->sg_len) + host->pio_ptr = sg_virt(++data->sg); + } else { + host->pio_ptr = p; + } + + if (host->sg_idx == data->sg_len) + return false; + + return true; +} + +static void sh_mmcif_single_read(struct sh_mmcif_host *host, + struct mmc_request *mrq) +{ + host->blocksize = (sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) & + BLOCK_SIZE_MASK) + 3; + + host->wait_for = MMCIF_WAIT_FOR_READ; + schedule_delayed_work(&host->timeout_work, host->timeout); /* buf read enable */ sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN); - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - if (time <= 0 || host->sd_error) - return sh_mmcif_error_manage(host); - - blocksize = (BLOCK_SIZE_MASK & - sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET)) + 3; - for (i = 0; i < blocksize / 4; i++) +} + +static bool sh_mmcif_read_block(struct sh_mmcif_host *host) +{ + struct mmc_data *data = host->mrq->data; + u32 *p = sg_virt(data->sg); + int i; + + if (host->sd_error) { + data->error = sh_mmcif_error_manage(host); + return false; + } + + for (i = 0; i < host->blocksize / 4; i++) *p++ = sh_mmcif_readl(host->addr, MMCIF_CE_DATA); /* buffer read end */ sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFRE); - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - if (time <= 0 || host->sd_error) - return sh_mmcif_error_manage(host); + host->wait_for = MMCIF_WAIT_FOR_READ_END; - return 0; + return true; } -static int sh_mmcif_multi_read(struct sh_mmcif_host *host, - struct mmc_request *mrq) +static void sh_mmcif_multi_read(struct sh_mmcif_host *host, + struct mmc_request *mrq) { struct mmc_data *data = mrq->data; - long time; - u32 blocksize, i, j, sec, *p; - - blocksize = BLOCK_SIZE_MASK & sh_mmcif_readl(host->addr, - MMCIF_CE_BLOCK_SET); - for (j = 0; j < data->sg_len; j++) { - p = sg_virt(data->sg); - for (sec = 0; sec < data->sg->length / blocksize; sec++) { - sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN); - /* buf read enable */ - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - - if (time <= 0 || host->sd_error) - return sh_mmcif_error_manage(host); - - for (i = 0; i < blocksize / 4; i++) - *p++ = sh_mmcif_readl(host->addr, - MMCIF_CE_DATA); - } - if (j < data->sg_len - 1) - data->sg++; + + if (!data->sg_len || !data->sg->length) + return; + + host->blocksize = sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) & + BLOCK_SIZE_MASK; + + host->wait_for = MMCIF_WAIT_FOR_MREAD; + host->sg_idx = 0; + host->sg_blkidx = 0; + host->pio_ptr = sg_virt(data->sg); + schedule_delayed_work(&host->timeout_work, host->timeout); + sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN); +} + +static bool sh_mmcif_mread_block(struct sh_mmcif_host *host) +{ + struct mmc_data *data = host->mrq->data; + u32 *p = host->pio_ptr; + int i; + + if (host->sd_error) { + data->error = sh_mmcif_error_manage(host); + return false; } - return 0; + + BUG_ON(!data->sg->length); + + for (i = 0; i < host->blocksize / 4; i++) + *p++ = sh_mmcif_readl(host->addr, MMCIF_CE_DATA); + + if (!sh_mmcif_next_block(host, p)) + return false; + + schedule_delayed_work(&host->timeout_work, host->timeout); + sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN); + + return true; } -static int sh_mmcif_single_write(struct sh_mmcif_host *host, +static void sh_mmcif_single_write(struct sh_mmcif_host *host, struct mmc_request *mrq) { - struct mmc_data *data = mrq->data; - long time; - u32 blocksize, i, *p = sg_virt(data->sg); + host->blocksize = (sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) & + BLOCK_SIZE_MASK) + 3; - sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN); + host->wait_for = MMCIF_WAIT_FOR_WRITE; + schedule_delayed_work(&host->timeout_work, host->timeout); /* buf write enable */ - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - if (time <= 0 || host->sd_error) - return sh_mmcif_error_manage(host); - - blocksize = (BLOCK_SIZE_MASK & - sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET)) + 3; - for (i = 0; i < blocksize / 4; i++) + sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN); +} + +static bool sh_mmcif_write_block(struct sh_mmcif_host *host) +{ + struct mmc_data *data = host->mrq->data; + u32 *p = sg_virt(data->sg); + int i; + + if (host->sd_error) { + data->error = sh_mmcif_error_manage(host); + return false; + } + + for (i = 0; i < host->blocksize / 4; i++) sh_mmcif_writel(host->addr, MMCIF_CE_DATA, *p++); /* buffer write end */ sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MDTRANE); + host->wait_for = MMCIF_WAIT_FOR_WRITE_END; - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - if (time <= 0 || host->sd_error) - return sh_mmcif_error_manage(host); - - return 0; + return true; } -static int sh_mmcif_multi_write(struct sh_mmcif_host *host, - struct mmc_request *mrq) +static void sh_mmcif_multi_write(struct sh_mmcif_host *host, + struct mmc_request *mrq) { struct mmc_data *data = mrq->data; - long time; - u32 i, sec, j, blocksize, *p; - blocksize = BLOCK_SIZE_MASK & sh_mmcif_readl(host->addr, - MMCIF_CE_BLOCK_SET); + if (!data->sg_len || !data->sg->length) + return; - for (j = 0; j < data->sg_len; j++) { - p = sg_virt(data->sg); - for (sec = 0; sec < data->sg->length / blocksize; sec++) { - sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN); - /* buf write enable*/ - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); + host->blocksize = sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) & + BLOCK_SIZE_MASK; - if (time <= 0 || host->sd_error) - return sh_mmcif_error_manage(host); + host->wait_for = MMCIF_WAIT_FOR_MWRITE; + host->sg_idx = 0; + host->sg_blkidx = 0; + host->pio_ptr = sg_virt(data->sg); + schedule_delayed_work(&host->timeout_work, host->timeout); + sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN); +} - for (i = 0; i < blocksize / 4; i++) - sh_mmcif_writel(host->addr, - MMCIF_CE_DATA, *p++); - } - if (j < data->sg_len - 1) - data->sg++; +static bool sh_mmcif_mwrite_block(struct sh_mmcif_host *host) +{ + struct mmc_data *data = host->mrq->data; + u32 *p = host->pio_ptr; + int i; + + if (host->sd_error) { + data->error = sh_mmcif_error_manage(host); + return false; } - return 0; + + BUG_ON(!data->sg->length); + + for (i = 0; i < host->blocksize / 4; i++) + sh_mmcif_writel(host->addr, MMCIF_CE_DATA, *p++); + + if (!sh_mmcif_next_block(host, p)) + return false; + + schedule_delayed_work(&host->timeout_work, host->timeout); + sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN); + + return true; } static void sh_mmcif_get_response(struct sh_mmcif_host *host, @@ -603,8 +714,11 @@ static void sh_mmcif_get_cmd12response(struct sh_mmcif_host *host, } static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host, - struct mmc_request *mrq, struct mmc_command *cmd, u32 opc) + struct mmc_request *mrq) { + struct mmc_data *data = mrq->data; + struct mmc_command *cmd = mrq->cmd; + u32 opc = cmd->opcode; u32 tmp = 0; /* Response Type check */ @@ -636,7 +750,7 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host, break; } /* WDAT / DATW */ - if (host->data) { + if (data) { tmp |= CMD_SET_WDAT; switch (host->bus_width) { case MMC_BUS_WIDTH_1: @@ -660,7 +774,7 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host, if (opc == MMC_READ_MULTIPLE_BLOCK || opc == MMC_WRITE_MULTIPLE_BLOCK) { tmp |= CMD_SET_CMLTE | CMD_SET_CMD12EN; sh_mmcif_bitset(host, MMCIF_CE_BLOCK_SET, - mrq->data->blocks << 16); + data->blocks << 16); } /* RIDXC[1:0] check bits */ if (opc == MMC_SEND_OP_COND || opc == MMC_ALL_SEND_CID || @@ -674,68 +788,60 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host, opc == MMC_SEND_CSD || opc == MMC_SEND_CID) tmp |= CMD_SET_CRC7C_INTERNAL; - return opc = ((opc << 24) | tmp); + return (opc << 24) | tmp; } static int sh_mmcif_data_trans(struct sh_mmcif_host *host, - struct mmc_request *mrq, u32 opc) + struct mmc_request *mrq, u32 opc) { - int ret; - switch (opc) { case MMC_READ_MULTIPLE_BLOCK: - ret = sh_mmcif_multi_read(host, mrq); - break; + sh_mmcif_multi_read(host, mrq); + return 0; case MMC_WRITE_MULTIPLE_BLOCK: - ret = sh_mmcif_multi_write(host, mrq); - break; + sh_mmcif_multi_write(host, mrq); + return 0; case MMC_WRITE_BLOCK: - ret = sh_mmcif_single_write(host, mrq); - break; + sh_mmcif_single_write(host, mrq); + return 0; case MMC_READ_SINGLE_BLOCK: case MMC_SEND_EXT_CSD: - ret = sh_mmcif_single_read(host, mrq); - break; + sh_mmcif_single_read(host, mrq); + return 0; default: dev_err(&host->pd->dev, "UNSUPPORTED CMD = d'%08d\n", opc); - ret = -EINVAL; - break; + return -EINVAL; } - return ret; } static void sh_mmcif_start_cmd(struct sh_mmcif_host *host, - struct mmc_request *mrq, struct mmc_command *cmd) + struct mmc_request *mrq) { - long time; - int ret = 0, mask = 0; + struct mmc_command *cmd = mrq->cmd; u32 opc = cmd->opcode; + u32 mask; switch (opc) { - /* respons busy check */ + /* response busy check */ case MMC_SWITCH: case MMC_STOP_TRANSMISSION: case MMC_SET_WRITE_PROT: case MMC_CLR_WRITE_PROT: case MMC_ERASE: case MMC_GEN_CMD: - mask = MASK_MRBSYE; + mask = MASK_START_CMD | MASK_MRBSYE; break; default: - mask = MASK_MCRSPE; + mask = MASK_START_CMD | MASK_MCRSPE; break; } - mask |= MASK_MCMDVIO | MASK_MBUFVIO | MASK_MWDATERR | - MASK_MRDATERR | MASK_MRIDXERR | MASK_MRSPERR | - MASK_MCCSTO | MASK_MCRCSTO | MASK_MWDATTO | - MASK_MRDATTO | MASK_MRBSYTO | MASK_MRSPTO; - if (host->data) { + if (mrq->data) { sh_mmcif_writel(host->addr, MMCIF_CE_BLOCK_SET, 0); sh_mmcif_writel(host->addr, MMCIF_CE_BLOCK_SET, mrq->data->blksz); } - opc = sh_mmcif_set_cmd(host, mrq, cmd, opc); + opc = sh_mmcif_set_cmd(host, mrq); sh_mmcif_writel(host->addr, MMCIF_CE_INT, 0xD80430C0); sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, mask); @@ -744,80 +850,28 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host, /* set cmd */ sh_mmcif_writel(host->addr, MMCIF_CE_CMD_SET, opc); - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - if (time <= 0) { - cmd->error = sh_mmcif_error_manage(host); - return; - } - if (host->sd_error) { - switch (cmd->opcode) { - case MMC_ALL_SEND_CID: - case MMC_SELECT_CARD: - case MMC_APP_CMD: - cmd->error = -ETIMEDOUT; - break; - default: - dev_dbg(&host->pd->dev, "Cmd(d'%d) err\n", - cmd->opcode); - cmd->error = sh_mmcif_error_manage(host); - break; - } - host->sd_error = false; - return; - } - if (!(cmd->flags & MMC_RSP_PRESENT)) { - cmd->error = 0; - return; - } - sh_mmcif_get_response(host, cmd); - if (host->data) { - if (!host->dma_active) { - ret = sh_mmcif_data_trans(host, mrq, cmd->opcode); - } else { - long time = - wait_for_completion_interruptible_timeout(&host->dma_complete, - host->timeout); - if (!time) - ret = -ETIMEDOUT; - else if (time < 0) - ret = time; - sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, - BUF_ACC_DMAREN | BUF_ACC_DMAWEN); - host->dma_active = false; - } - if (ret < 0) - mrq->data->bytes_xfered = 0; - else - mrq->data->bytes_xfered = - mrq->data->blocks * mrq->data->blksz; - } - cmd->error = ret; + host->wait_for = MMCIF_WAIT_FOR_CMD; + schedule_delayed_work(&host->timeout_work, host->timeout); } static void sh_mmcif_stop_cmd(struct sh_mmcif_host *host, - struct mmc_request *mrq, struct mmc_command *cmd) + struct mmc_request *mrq) { - long time; - - if (mrq->cmd->opcode == MMC_READ_MULTIPLE_BLOCK) + switch (mrq->cmd->opcode) { + case MMC_READ_MULTIPLE_BLOCK: sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12DRE); - else if (mrq->cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK) + break; + case MMC_WRITE_MULTIPLE_BLOCK: sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE); - else { + break; + default: dev_err(&host->pd->dev, "unsupported stop cmd\n"); - cmd->error = sh_mmcif_error_manage(host); + mrq->stop->error = sh_mmcif_error_manage(host); return; } - time = wait_for_completion_interruptible_timeout(&host->intr_wait, - host->timeout); - if (time <= 0 || host->sd_error) { - cmd->error = sh_mmcif_error_manage(host); - return; - } - sh_mmcif_get_cmd12response(host, cmd); - cmd->error = 0; + host->wait_for = MMCIF_WAIT_FOR_STOP; + schedule_delayed_work(&host->timeout_work, host->timeout); } static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq) @@ -856,23 +910,10 @@ static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq) default: break; } - host->data = mrq->data; - if (mrq->data) { - if (mrq->data->flags & MMC_DATA_READ) { - if (host->chan_rx) - sh_mmcif_start_dma_rx(host); - } else { - if (host->chan_tx) - sh_mmcif_start_dma_tx(host); - } - } - sh_mmcif_start_cmd(host, mrq, mrq->cmd); - host->data = NULL; - if (!mrq->cmd->error && mrq->stop) - sh_mmcif_stop_cmd(host, mrq, mrq->stop); - host->state = STATE_IDLE; - mmc_request_done(mmc, mrq); + host->mrq = mrq; + + sh_mmcif_start_cmd(host, mrq); } static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) @@ -947,9 +988,156 @@ static struct mmc_host_ops sh_mmcif_ops = { .get_cd = sh_mmcif_get_cd, }; -static void sh_mmcif_detect(struct mmc_host *mmc) +static bool sh_mmcif_end_cmd(struct sh_mmcif_host *host) { - mmc_detect_change(mmc, 0); + struct mmc_command *cmd = host->mrq->cmd; + struct mmc_data *data = host->mrq->data; + long time; + + if (host->sd_error) { + switch (cmd->opcode) { + case MMC_ALL_SEND_CID: + case MMC_SELECT_CARD: + case MMC_APP_CMD: + cmd->error = -ETIMEDOUT; + host->sd_error = false; + break; + default: + cmd->error = sh_mmcif_error_manage(host); + dev_dbg(&host->pd->dev, "Cmd(d'%d) error %d\n", + cmd->opcode, cmd->error); + break; + } + return false; + } + if (!(cmd->flags & MMC_RSP_PRESENT)) { + cmd->error = 0; + return false; + } + + sh_mmcif_get_response(host, cmd); + + if (!data) + return false; + + if (data->flags & MMC_DATA_READ) { + if (host->chan_rx) + sh_mmcif_start_dma_rx(host); + } else { + if (host->chan_tx) + sh_mmcif_start_dma_tx(host); + } + + if (!host->dma_active) { + data->error = sh_mmcif_data_trans(host, host->mrq, cmd->opcode); + if (!data->error) + return true; + return false; + } + + /* Running in the IRQ thread, can sleep */ + time = wait_for_completion_interruptible_timeout(&host->dma_complete, + host->timeout); + if (host->sd_error) { + dev_err(host->mmc->parent, + "Error IRQ while waiting for DMA completion!\n"); + /* Woken up by an error IRQ: abort DMA */ + if (data->flags & MMC_DATA_READ) + dmaengine_terminate_all(host->chan_rx); + else + dmaengine_terminate_all(host->chan_tx); + data->error = sh_mmcif_error_manage(host); + } else if (!time) { + data->error = -ETIMEDOUT; + } else if (time < 0) { + data->error = time; + } + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, + BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + host->dma_active = false; + + if (data->error) + data->bytes_xfered = 0; + + return false; +} + +static irqreturn_t sh_mmcif_irqt(int irq, void *dev_id) +{ + struct sh_mmcif_host *host = dev_id; + struct mmc_request *mrq = host->mrq; + struct mmc_data *data = mrq->data; + + cancel_delayed_work_sync(&host->timeout_work); + + /* + * All handlers return true, if processing continues, and false, if the + * request has to be completed - successfully or not + */ + switch (host->wait_for) { + case MMCIF_WAIT_FOR_REQUEST: + /* We're too late, the timeout has already kicked in */ + return IRQ_HANDLED; + case MMCIF_WAIT_FOR_CMD: + if (sh_mmcif_end_cmd(host)) + /* Wait for data */ + return IRQ_HANDLED; + break; + case MMCIF_WAIT_FOR_MREAD: + if (sh_mmcif_mread_block(host)) + /* Wait for more data */ + return IRQ_HANDLED; + break; + case MMCIF_WAIT_FOR_READ: + if (sh_mmcif_read_block(host)) + /* Wait for data end */ + return IRQ_HANDLED; + break; + case MMCIF_WAIT_FOR_MWRITE: + if (sh_mmcif_mwrite_block(host)) + /* Wait data to write */ + return IRQ_HANDLED; + break; + case MMCIF_WAIT_FOR_WRITE: + if (sh_mmcif_write_block(host)) + /* Wait for data end */ + return IRQ_HANDLED; + break; + case MMCIF_WAIT_FOR_STOP: + if (host->sd_error) { + mrq->stop->error = sh_mmcif_error_manage(host); + break; + } + sh_mmcif_get_cmd12response(host, mrq->stop); + mrq->stop->error = 0; + break; + case MMCIF_WAIT_FOR_READ_END: + case MMCIF_WAIT_FOR_WRITE_END: + if (host->sd_error) + data->error = sh_mmcif_error_manage(host); + break; + default: + BUG(); + } + + if (host->wait_for != MMCIF_WAIT_FOR_STOP) { + if (!mrq->cmd->error && data && !data->error) + data->bytes_xfered = + data->blocks * data->blksz; + + if (mrq->stop && !mrq->cmd->error && (!data || !data->error)) { + sh_mmcif_stop_cmd(host, mrq); + if (!mrq->stop->error) + return IRQ_HANDLED; + } + } + + host->wait_for = MMCIF_WAIT_FOR_REQUEST; + host->state = STATE_IDLE; + host->mrq = NULL; + mmc_request_done(host->mmc, mrq); + + return IRQ_HANDLED; } static irqreturn_t sh_mmcif_intr(int irq, void *dev_id) @@ -960,7 +1148,12 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id) state = sh_mmcif_readl(host->addr, MMCIF_CE_INT); - if (state & INT_RBSYE) { + if (state & INT_ERR_STS) { + /* error interrupts - process first */ + sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state); + sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state); + err = 1; + } else if (state & INT_RBSYE) { sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~(INT_RBSYE | INT_CRSPE)); sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MRBSYE); @@ -988,11 +1181,6 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id) sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~(INT_CMD12RBE | INT_CMD12CRE)); sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE); - } else if (state & INT_ERR_STS) { - /* err interrupts */ - sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state); - sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state); - err = 1; } else { dev_dbg(&host->pd->dev, "Unsupported interrupt: 0x%x\n", state); sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state); @@ -1003,14 +1191,57 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id) host->sd_error = true; dev_dbg(&host->pd->dev, "int err state = %08x\n", state); } - if (state & ~(INT_CMD12RBE | INT_CMD12CRE)) - complete(&host->intr_wait); - else + if (state & ~(INT_CMD12RBE | INT_CMD12CRE)) { + if (!host->dma_active) + return IRQ_WAKE_THREAD; + else if (host->sd_error) + mmcif_dma_complete(host); + } else { dev_dbg(&host->pd->dev, "Unexpected IRQ 0x%x\n", state); + } return IRQ_HANDLED; } +static void mmcif_timeout_work(struct work_struct *work) +{ + struct delayed_work *d = container_of(work, struct delayed_work, work); + struct sh_mmcif_host *host = container_of(d, struct sh_mmcif_host, timeout_work); + struct mmc_request *mrq = host->mrq; + + if (host->dying) + /* Don't run after mmc_remove_host() */ + return; + + /* + * Handle races with cancel_delayed_work(), unless + * cancel_delayed_work_sync() is used + */ + switch (host->wait_for) { + case MMCIF_WAIT_FOR_CMD: + mrq->cmd->error = sh_mmcif_error_manage(host); + break; + case MMCIF_WAIT_FOR_STOP: + mrq->stop->error = sh_mmcif_error_manage(host); + break; + case MMCIF_WAIT_FOR_MREAD: + case MMCIF_WAIT_FOR_MWRITE: + case MMCIF_WAIT_FOR_READ: + case MMCIF_WAIT_FOR_WRITE: + case MMCIF_WAIT_FOR_READ_END: + case MMCIF_WAIT_FOR_WRITE_END: + mrq->data->error = sh_mmcif_error_manage(host); + break; + default: + BUG(); + } + + host->state = STATE_IDLE; + host->wait_for = MMCIF_WAIT_FOR_REQUEST; + host->mrq = NULL; + mmc_request_done(host->mmc, mrq); +} + static int __devinit sh_mmcif_probe(struct platform_device *pdev) { int ret = 0, irq[2]; @@ -1064,7 +1295,6 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev) host->clk = clk_get_rate(host->hclk); host->pd = pdev; - init_completion(&host->intr_wait); spin_lock_init(&host->lock); mmc->ops = &sh_mmcif_ops; @@ -1101,19 +1331,21 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev) sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL); - ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host); + ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, "sh_mmc:error", host); if (ret) { dev_err(&pdev->dev, "request_irq error (sh_mmc:error)\n"); goto clean_up3; } - ret = request_irq(irq[1], sh_mmcif_intr, 0, "sh_mmc:int", host); + ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt, 0, "sh_mmc:int", host); if (ret) { free_irq(irq[0], host); dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n"); goto clean_up3; } - sh_mmcif_detect(host->mmc); + INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work); + + mmc_detect_change(host->mmc, 0); dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION); dev_dbg(&pdev->dev, "chip ver H'%04x\n", @@ -1139,11 +1371,19 @@ static int __devexit sh_mmcif_remove(struct platform_device *pdev) struct sh_mmcif_host *host = platform_get_drvdata(pdev); int irq[2]; + host->dying = true; pm_runtime_get_sync(&pdev->dev); mmc_remove_host(host->mmc); sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL); + /* + * FIXME: cancel_delayed_work(_sync)() and free_irq() race with the + * mmc_remove_host() call above. But swapping order doesn't help either + * (a query on the linux-mmc mailing list didn't bring any replies). + */ + cancel_delayed_work_sync(&host->timeout_work); + if (host->addr) iounmap(host->addr); @@ -1206,19 +1446,7 @@ static struct platform_driver sh_mmcif_driver = { }, }; -static int __init sh_mmcif_init(void) -{ - return platform_driver_register(&sh_mmcif_driver); -} - -static void __exit sh_mmcif_exit(void) -{ - platform_driver_unregister(&sh_mmcif_driver); -} - -module_init(sh_mmcif_init); -module_exit(sh_mmcif_exit); - +module_platform_driver(sh_mmcif_driver); MODULE_DESCRIPTION("SuperH on-chip MMC/eMMC interface driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 41ae6466bd8..58da3c44acc 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -282,18 +282,7 @@ static struct platform_driver sh_mobile_sdhi_driver = { .remove = __devexit_p(sh_mobile_sdhi_remove), }; -static int __init sh_mobile_sdhi_init(void) -{ - return platform_driver_register(&sh_mobile_sdhi_driver); -} - -static void __exit sh_mobile_sdhi_exit(void) -{ - platform_driver_unregister(&sh_mobile_sdhi_driver); -} - -module_init(sh_mobile_sdhi_init); -module_exit(sh_mobile_sdhi_exit); +module_platform_driver(sh_mobile_sdhi_driver); MODULE_DESCRIPTION("SuperH Mobile SDHI driver"); MODULE_AUTHOR("Magnus Damm"); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 69d249f51d6..43d962829f8 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -118,7 +118,7 @@ static void tifm_sd_read_fifo(struct tifm_sd *host, struct page *pg, unsigned char *buf; unsigned int pos = 0, val; - buf = kmap_atomic(pg, KM_BIO_DST_IRQ) + off; + buf = kmap_atomic(pg) + off; if (host->cmd_flags & DATA_CARRY) { buf[pos++] = host->bounce_buf_data[0]; host->cmd_flags &= ~DATA_CARRY; @@ -134,7 +134,7 @@ static void tifm_sd_read_fifo(struct tifm_sd *host, struct page *pg, } buf[pos++] = (val >> 8) & 0xff; } - kunmap_atomic(buf - off, KM_BIO_DST_IRQ); + kunmap_atomic(buf - off); } static void tifm_sd_write_fifo(struct tifm_sd *host, struct page *pg, @@ -144,7 +144,7 @@ static void tifm_sd_write_fifo(struct tifm_sd *host, struct page *pg, unsigned char *buf; unsigned int pos = 0, val; - buf = kmap_atomic(pg, KM_BIO_SRC_IRQ) + off; + buf = kmap_atomic(pg) + off; if (host->cmd_flags & DATA_CARRY) { val = host->bounce_buf_data[0] | ((buf[pos++] << 8) & 0xff00); writel(val, sock->addr + SOCK_MMCSD_DATA); @@ -161,7 +161,7 @@ static void tifm_sd_write_fifo(struct tifm_sd *host, struct page *pg, val |= (buf[pos++] << 8) & 0xff00; writel(val, sock->addr + SOCK_MMCSD_DATA); } - kunmap_atomic(buf - off, KM_BIO_SRC_IRQ); + kunmap_atomic(buf - off); } static void tifm_sd_transfer_data(struct tifm_sd *host) @@ -212,13 +212,13 @@ static void tifm_sd_copy_page(struct page *dst, unsigned int dst_off, struct page *src, unsigned int src_off, unsigned int count) { - unsigned char *src_buf = kmap_atomic(src, KM_BIO_SRC_IRQ) + src_off; - unsigned char *dst_buf = kmap_atomic(dst, KM_BIO_DST_IRQ) + dst_off; + unsigned char *src_buf = kmap_atomic(src) + src_off; + unsigned char *dst_buf = kmap_atomic(dst) + dst_off; memcpy(dst_buf, src_buf, count); - kunmap_atomic(dst_buf - dst_off, KM_BIO_DST_IRQ); - kunmap_atomic(src_buf - src_off, KM_BIO_SRC_IRQ); + kunmap_atomic(dst_buf - dst_off); + kunmap_atomic(src_buf - src_off); } static void tifm_sd_bounce_block(struct tifm_sd *host, struct mmc_data *r_data) diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index a4ea1024278..113ce6c9cf3 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -138,19 +138,7 @@ static struct platform_driver tmio_mmc_driver = { .resume = tmio_mmc_resume, }; - -static int __init tmio_mmc_init(void) -{ - return platform_driver_register(&tmio_mmc_driver); -} - -static void __exit tmio_mmc_exit(void) -{ - platform_driver_unregister(&tmio_mmc_driver); -} - -module_init(tmio_mmc_init); -module_exit(tmio_mmc_exit); +module_platform_driver(tmio_mmc_driver); MODULE_DESCRIPTION("Toshiba TMIO SD/MMC driver"); MODULE_AUTHOR("Ian Molton <spyro@f2s.com>"); diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 3020f98218f..a95e6d90172 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -105,13 +105,13 @@ static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg, unsigned long *flags) { local_irq_save(*flags); - return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; + return kmap_atomic(sg_page(sg)) + sg->offset; } static inline void tmio_mmc_kunmap_atomic(struct scatterlist *sg, unsigned long *flags, void *virt) { - kunmap_atomic(virt - sg->offset, KM_BIO_SRC_IRQ); + kunmap_atomic(virt - sg->offset); local_irq_restore(*flags); } diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index 4208b395806..abad01b37cf 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -800,8 +800,7 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } else if (ios->power_mode != MMC_POWER_UP) { if (host->set_pwr && ios->power_mode == MMC_POWER_OFF) host->set_pwr(host->pdev, 0); - if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && - pdata->power) { + if (pdata->power) { pdata->power = false; pm_runtime_put(&host->pdev->dev); } @@ -915,6 +914,23 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, if (ret < 0) goto pm_disable; + /* + * There are 4 different scenarios for the card detection: + * 1) an external gpio irq handles the cd (best for power savings) + * 2) internal sdhi irq handles the cd + * 3) a worker thread polls the sdhi - indicated by MMC_CAP_NEEDS_POLL + * 4) the medium is non-removable - indicated by MMC_CAP_NONREMOVABLE + * + * While we increment the rtpm counter for all scenarios when the mmc + * core activates us by calling an appropriate set_ios(), we must + * additionally ensure that in case 2) the tmio mmc hardware stays + * powered on during runtime for the card detection to work. + */ + if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD + || mmc->caps & MMC_CAP_NEEDS_POLL + || mmc->caps & MMC_CAP_NONREMOVABLE)) + pm_runtime_get_noresume(&pdev->dev); + tmio_mmc_clk_stop(_host); tmio_mmc_reset(_host); @@ -933,12 +949,6 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, /* See if we also get DMA */ tmio_mmc_request_dma(_host, pdata); - /* We have to keep the device powered for its card detection to work */ - if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) { - pdata->power = true; - pm_runtime_get_noresume(&pdev->dev); - } - mmc_add_host(mmc); /* Unmask the IRQs we want to know about */ @@ -974,7 +984,9 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) * the controller, the runtime PM is suspended and pdata->power == false, * so, our .runtime_resume() will not try to detect a card in the slot. */ - if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD) + if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD + || host->mmc->caps & MMC_CAP_NEEDS_POLL + || host->mmc->caps & MMC_CAP_NONREMOVABLE) pm_runtime_get_sync(&pdev->dev); mmc_remove_host(host->mmc); diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index db8e8272d69..3ce99e00a49 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -315,8 +315,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper, char *dst; if (reason != KMSG_DUMP_OOPS && - reason != KMSG_DUMP_PANIC && - reason != KMSG_DUMP_KEXEC) + reason != KMSG_DUMP_PANIC) return; /* Only dump oopses if dump_oops is set */ diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 64fbb002182..ead2cd16ba7 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -43,7 +43,10 @@ pr_debug("UBI DBG " type ": " fmt "\n", ##__VA_ARGS__) /* Just a debugging messages not related to any specific UBI subsystem */ -#define dbg_msg(fmt, ...) ubi_dbg_msg("msg", fmt, ##__VA_ARGS__) +#define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) + /* General debugging messages */ #define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Messages from the eraseblock association sub-system */ diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 9ad18da1891..890754c9f32 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -306,7 +306,7 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, int copy, void *vtbl) { int err, tries = 0; - static struct ubi_vid_hdr *vid_hdr; + struct ubi_vid_hdr *vid_hdr; struct ubi_scan_leb *new_seb; ubi_msg("create volume table (copy #%d)", copy + 1); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 106b88a0473..342626f4bc4 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -99,16 +99,26 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size) /*********************** tlb specific functions ***************************/ -static inline void _lock_tx_hashtbl(struct bonding *bond) +static inline void _lock_tx_hashtbl_bh(struct bonding *bond) { spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); } -static inline void _unlock_tx_hashtbl(struct bonding *bond) +static inline void _unlock_tx_hashtbl_bh(struct bonding *bond) { spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); } +static inline void _lock_tx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + +static inline void _unlock_tx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + /* Caller must hold tx_hashtbl lock */ static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) { @@ -129,14 +139,13 @@ static inline void tlb_init_slave(struct slave *slave) SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; } -/* Caller must hold bond lock for read */ -static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) +/* Caller must hold bond lock for read, BH disabled */ +static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, + int save_load) { struct tlb_client_info *tx_hash_table; u32 index; - _lock_tx_hashtbl(bond); - /* clear slave from tx_hashtbl */ tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; @@ -151,8 +160,15 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_ } tlb_init_slave(slave); +} - _unlock_tx_hashtbl(bond); +/* Caller must hold bond lock for read */ +static void tlb_clear_slave(struct bonding *bond, struct slave *slave, + int save_load) +{ + _lock_tx_hashtbl_bh(bond); + __tlb_clear_slave(bond, slave, save_load); + _unlock_tx_hashtbl_bh(bond); } /* Must be called before starting the monitor timer */ @@ -169,7 +185,7 @@ static int tlb_initialize(struct bonding *bond) bond->dev->name); return -1; } - _lock_tx_hashtbl(bond); + _lock_tx_hashtbl_bh(bond); bond_info->tx_hashtbl = new_hashtbl; @@ -177,7 +193,7 @@ static int tlb_initialize(struct bonding *bond) tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); } - _unlock_tx_hashtbl(bond); + _unlock_tx_hashtbl_bh(bond); return 0; } @@ -187,12 +203,12 @@ static void tlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - _lock_tx_hashtbl(bond); + _lock_tx_hashtbl_bh(bond); kfree(bond_info->tx_hashtbl); bond_info->tx_hashtbl = NULL; - _unlock_tx_hashtbl(bond); + _unlock_tx_hashtbl_bh(bond); } static long long compute_gap(struct slave *slave) @@ -226,15 +242,13 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) return least_loaded; } -/* Caller must hold bond lock for read */ -static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) +static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, + u32 skb_len) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct tlb_client_info *hash_table; struct slave *assigned_slave; - _lock_tx_hashtbl(bond); - hash_table = bond_info->tx_hashtbl; assigned_slave = hash_table[hash_index].tx_slave; if (!assigned_slave) { @@ -263,22 +277,46 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3 hash_table[hash_index].tx_bytes += skb_len; } - _unlock_tx_hashtbl(bond); - return assigned_slave; } +/* Caller must hold bond lock for read */ +static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, + u32 skb_len) +{ + struct slave *tx_slave; + /* + * We don't need to disable softirq here, becase + * tlb_choose_channel() is only called by bond_alb_xmit() + * which already has softirq disabled. + */ + _lock_tx_hashtbl(bond); + tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); + _unlock_tx_hashtbl(bond); + return tx_slave; +} + /*********************** rlb specific functions ***************************/ -static inline void _lock_rx_hashtbl(struct bonding *bond) +static inline void _lock_rx_hashtbl_bh(struct bonding *bond) { spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); } -static inline void _unlock_rx_hashtbl(struct bonding *bond) +static inline void _unlock_rx_hashtbl_bh(struct bonding *bond) { spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); } +static inline void _lock_rx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +static inline void _unlock_rx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + /* when an ARP REPLY is received from a client update its info * in the rx_hashtbl */ @@ -288,7 +326,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -303,7 +341,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) bond_info->rx_ntt = 1; } - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); } static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond, @@ -401,7 +439,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) u32 index, next_index; /* clear slave from rx_hashtbl */ - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); rx_hash_table = bond_info->rx_hashtbl; index = bond_info->rx_hashtbl_head; @@ -432,7 +470,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) } } - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); write_lock_bh(&bond->curr_slave_lock); @@ -489,7 +527,7 @@ static void rlb_update_rx_clients(struct bonding *bond) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); hash_index = bond_info->rx_hashtbl_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { @@ -507,7 +545,7 @@ static void rlb_update_rx_clients(struct bonding *bond) */ bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); } /* The slave was assigned a new mac address - update the clients */ @@ -518,7 +556,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla int ntt = 0; u32 hash_index; - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); hash_index = bond_info->rx_hashtbl_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { @@ -538,7 +576,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; } - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); } /* mark all clients using src_ip to be updated */ @@ -709,7 +747,7 @@ static void rlb_rebalance(struct bonding *bond) int ntt; u32 hash_index; - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); ntt = 0; hash_index = bond_info->rx_hashtbl_head; @@ -727,7 +765,7 @@ static void rlb_rebalance(struct bonding *bond) if (ntt) { bond_info->rx_ntt = 1; } - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); } /* Caller must hold rx_hashtbl lock */ @@ -751,7 +789,7 @@ static int rlb_initialize(struct bonding *bond) bond->dev->name); return -1; } - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); bond_info->rx_hashtbl = new_hashtbl; @@ -761,7 +799,7 @@ static int rlb_initialize(struct bonding *bond) rlb_init_table_entry(bond_info->rx_hashtbl + i); } - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); /* register to receive ARPs */ bond->recv_probe = rlb_arp_recv; @@ -773,13 +811,13 @@ static void rlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); kfree(bond_info->rx_hashtbl); bond_info->rx_hashtbl = NULL; bond_info->rx_hashtbl_head = RLB_NULL_INDEX; - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); } static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) @@ -787,7 +825,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 curr_index; - _lock_rx_hashtbl(bond); + _lock_rx_hashtbl_bh(bond); curr_index = bond_info->rx_hashtbl_head; while (curr_index != RLB_NULL_INDEX) { @@ -812,7 +850,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) curr_index = next_index; } - _unlock_rx_hashtbl(bond); + _unlock_rx_hashtbl_bh(bond); } /*********************** tlb/rlb shared functions *********************/ @@ -1320,7 +1358,9 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) res = bond_dev_queue_xmit(bond, skb, tx_slave->dev); } else { if (tx_slave) { - tlb_clear_slave(bond, tx_slave, 0); + _lock_tx_hashtbl(bond); + __tlb_clear_slave(bond, tx_slave, 0); + _unlock_tx_hashtbl(bond); } } diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 9e8ba4f5636..0f92e3567f6 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -623,7 +623,8 @@ static int ax_mii_init(struct net_device *dev) ax->mii_bus->name = "ax88796_mii_bus"; ax->mii_bus->parent = dev->dev.parent; - snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); + snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); ax->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); if (!ax->mii_bus->irq) { diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index b6d69c91db9..d812a103e03 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1670,7 +1670,8 @@ static int __devinit bfin_mii_bus_probe(struct platform_device *pdev) miibus->name = "bfin_mii_bus"; miibus->phy_mask = mii_bus_pd->phy_mask; - snprintf(miibus->id, MII_BUS_ID_SIZE, "0"); + snprintf(miibus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); miibus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); if (!miibus->irq) goto out_err_irq_alloc; diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index cc9262be69c..8b95dd31425 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1171,7 +1171,8 @@ static int __devinit au1000_probe(struct platform_device *pdev) aup->mii_bus->write = au1000_mdiobus_write; aup->mii_bus->reset = au1000_mdiobus_reset; aup->mii_bus->name = "au1000_eth_mii"; - snprintf(aup->mii_bus->id, MII_BUS_ID_SIZE, "%x", aup->mac_id); + snprintf(aup->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, aup->mac_id); aup->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); if (aup->mii_bus->irq == NULL) goto err_out; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index d44331eb07f..986019b2c84 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1727,7 +1727,7 @@ static int __devinit bcm_enet_probe(struct platform_device *pdev) bus->priv = priv; bus->read = bcm_enet_mdio_read_phylib; bus->write = bcm_enet_mdio_write_phylib; - sprintf(bus->id, "%d", priv->mac_id); + sprintf(bus->id, "%s-%d", pdev->name, priv->mac_id); /* only probe bus where we think the PHY is, because * the mdio read operation return 0 instead of 0xffff diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 8fa7abc53ec..084904ceaa3 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2259,7 +2259,8 @@ static int sbmac_init(struct platform_device *pldev, long long base) } sc->mii_bus->name = sbmac_mdio_string; - snprintf(sc->mii_bus->id, MII_BUS_ID_SIZE, "%x", idx); + snprintf(sc->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pldev->name, idx); sc->mii_bus->priv = sc; sc->mii_bus->read = sbmac_mii_read; sc->mii_bus->write = sbmac_mii_write; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index f3d5c65d99c..23200680d4c 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -243,7 +243,8 @@ static int macb_mii_init(struct macb *bp) bp->mii_bus->read = &macb_mdio_read; bp->mii_bus->write = &macb_mdio_write; bp->mii_bus->reset = &macb_mdio_reset; - snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%x", bp->pdev->id); + snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + bp->pdev->name, bp->pdev->id); bp->mii_bus->priv = bp; bp->mii_bus->parent = &bp->dev->dev; pdata = bp->pdev->dev.platform_data; diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index ce88c0f399f..925c9bafc9b 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -325,7 +325,8 @@ static int dnet_mii_init(struct dnet *bp) bp->mii_bus->write = &dnet_mdio_write; bp->mii_bus->reset = &dnet_mdio_reset; - snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0); + snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + bp->pdev->name, bp->pdev->id); bp->mii_bus->priv = bp; diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c index ddcbbb34d1b..7b25e9cf13f 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec.c @@ -476,6 +476,7 @@ fec_restart(struct net_device *ndev, int duplex) } else { #ifdef FEC_MIIGSK_ENR if (id_entry->driver_data & FEC_QUIRK_USE_GASKET) { + u32 cfgr; /* disable the gasket and wait */ writel(0, fep->hwp + FEC_MIIGSK_ENR); while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4) @@ -486,9 +487,11 @@ fec_restart(struct net_device *ndev, int duplex) * RMII, 50 MHz, no loopback, no echo * MII, 25 MHz, no loopback, no echo */ - writel((fep->phy_interface == PHY_INTERFACE_MODE_RMII) ? - 1 : 0, fep->hwp + FEC_MIIGSK_CFGR); - + cfgr = (fep->phy_interface == PHY_INTERFACE_MODE_RMII) + ? BM_MIIGSK_CFGR_RMII : BM_MIIGSK_CFGR_MII; + if (fep->phy_dev && fep->phy_dev->speed == SPEED_10) + cfgr |= BM_MIIGSK_CFGR_FRCONT_10M; + writel(cfgr, fep->hwp + FEC_MIIGSK_CFGR); /* re-enable the gasket */ writel(2, fep->hwp + FEC_MIIGSK_ENR); @@ -1077,7 +1080,8 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->read = fec_enet_mdio_read; fep->mii_bus->write = fec_enet_mdio_write; fep->mii_bus->reset = fec_enet_mdio_reset; - snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", fep->dev_id + 1); + snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, fep->dev_id + 1); fep->mii_bus->priv = fep; fep->mii_bus->parent = &pdev->dev; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 8b2c6d797e6..8408c627b19 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -47,6 +47,10 @@ #define FEC_MIIGSK_CFGR 0x300 /* MIIGSK Configuration reg */ #define FEC_MIIGSK_ENR 0x308 /* MIIGSK Enable reg */ +#define BM_MIIGSK_CFGR_MII 0x00 +#define BM_MIIGSK_CFGR_RMII 0x01 +#define BM_MIIGSK_CFGR_FRCONT_10M 0x40 + #else #define FEC_ECNTRL 0x000 /* Ethernet control reg */ diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index e01cdaa722a..39d160d353a 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1984,7 +1984,8 @@ static inline struct txfcb *gfar_add_fcb(struct sk_buff *skb) return fcb; } -static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb) +static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb, + int fcb_length) { u8 flags = 0; @@ -2006,7 +2007,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb) * frame (skb->data) and the start of the IP hdr. * l4os is the distance between the start of the * l3 hdr and the l4 hdr */ - fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN); + fcb->l3os = (u16)(skb_network_offset(skb) - fcb_length); fcb->l4os = skb_network_header_len(skb); fcb->flags = flags; @@ -2046,7 +2047,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) int i, rq = 0, do_tstamp = 0; u32 bufaddr; unsigned long flags; - unsigned int nr_frags, nr_txbds, length; + unsigned int nr_frags, nr_txbds, length, fcb_length = GMAC_FCB_LEN; /* * TOE=1 frames larger than 2500 bytes may see excess delays @@ -2070,22 +2071,28 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* check if time stamp should be generated */ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - priv->hwts_tx_en)) + priv->hwts_tx_en)) { do_tstamp = 1; + fcb_length = GMAC_FCB_LEN + GMAC_TXPAL_LEN; + } /* make space for additional header when fcb is needed */ if (((skb->ip_summed == CHECKSUM_PARTIAL) || vlan_tx_tag_present(skb) || unlikely(do_tstamp)) && - (skb_headroom(skb) < GMAC_FCB_LEN)) { + (skb_headroom(skb) < fcb_length)) { struct sk_buff *skb_new; - skb_new = skb_realloc_headroom(skb, GMAC_FCB_LEN); + skb_new = skb_realloc_headroom(skb, fcb_length); if (!skb_new) { dev->stats.tx_errors++; kfree_skb(skb); return NETDEV_TX_OK; } + + /* Steal sock reference for processing TX time stamps */ + swap(skb_new->sk, skb->sk); + swap(skb_new->destructor, skb->destructor); kfree_skb(skb); skb = skb_new; } @@ -2154,6 +2161,12 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) lstatus = txbdp_start->lstatus; } + /* Add TxPAL between FCB and frame if required */ + if (unlikely(do_tstamp)) { + skb_push(skb, GMAC_TXPAL_LEN); + memset(skb->data, 0, GMAC_TXPAL_LEN); + } + /* Set up checksumming */ if (CHECKSUM_PARTIAL == skb->ip_summed) { fcb = gfar_add_fcb(skb); @@ -2164,7 +2177,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_checksum_help(skb); } else { lstatus |= BD_LFLAG(TXBD_TOE); - gfar_tx_checksum(skb, fcb); + gfar_tx_checksum(skb, fcb, fcb_length); } } @@ -2196,9 +2209,9 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) * the full frame length. */ if (unlikely(do_tstamp)) { - txbdp_tstamp->bufPtr = txbdp_start->bufPtr + GMAC_FCB_LEN; + txbdp_tstamp->bufPtr = txbdp_start->bufPtr + fcb_length; txbdp_tstamp->lstatus |= BD_LFLAG(TXBD_READY) | - (skb_headlen(skb) - GMAC_FCB_LEN); + (skb_headlen(skb) - fcb_length); lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN; } else { lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb); @@ -2490,7 +2503,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { next = next_txbd(bdp, base, tx_ring_size); - buflen = next->length + GMAC_FCB_LEN; + buflen = next->length + GMAC_FCB_LEN + GMAC_TXPAL_LEN; } else buflen = bdp->length; @@ -2502,6 +2515,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(*ns); + skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN); skb_tstamp_tx(skb, &shhwtstamps); bdp->lstatus &= BD_LFLAG(TXBD_WRAP); bdp = next; diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index fe7ac3a8319..40c33a7554c 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -63,6 +63,9 @@ struct ethtool_rx_list { /* Length for FCB */ #define GMAC_FCB_LEN 8 +/* Length for TxPAL */ +#define GMAC_TXPAL_LEN 16 + /* Default padding amount */ #define DEFAULT_PADDING 2 diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 0b3567ab812..85e2c6cd970 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -98,6 +98,7 @@ struct ltq_etop_chan { struct ltq_etop_priv { struct net_device *netdev; + struct platform_device *pdev; struct ltq_eth_data *pldata; struct resource *res; @@ -436,7 +437,8 @@ ltq_etop_mdio_init(struct net_device *dev) priv->mii_bus->read = ltq_etop_mdio_rd; priv->mii_bus->write = ltq_etop_mdio_wr; priv->mii_bus->name = "ltq_mii"; - snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0); + snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + priv->pdev->name, priv->pdev->id); priv->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); if (!priv->mii_bus->irq) { err = -ENOMEM; @@ -734,6 +736,7 @@ ltq_etop_probe(struct platform_device *pdev) dev->ethtool_ops = <q_etop_ethtool_ops; priv = netdev_priv(dev); priv->res = res; + priv->pdev = pdev; priv->pldata = dev_get_platdata(&pdev->dev); priv->netdev = dev; spin_lock_init(&priv->lock); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 80aab4e5d69..9c049d2cb97 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2613,7 +2613,8 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) msp->smi_bus->name = "mv643xx_eth smi"; msp->smi_bus->read = smi_bus_read; msp->smi_bus->write = smi_bus_write, - snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id); + snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d", + pdev->name, pdev->id); msp->smi_bus->parent = &pdev->dev; msp->smi_bus->phy_mask = 0xffffffff; if (mdiobus_register(msp->smi_bus) < 0) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 5ec409e3da0..953ba5851f7 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1552,7 +1552,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) pep->smi_bus->name = "pxa168_eth smi"; pep->smi_bus->read = pxa168_smi_read; pep->smi_bus->write = pxa168_smi_write; - snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id); + snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d", + pdev->name, pdev->id); pep->smi_bus->parent = &pdev->dev; pep->smi_bus->phy_mask = 0xffffffff; err = mdiobus_register(pep->smi_bus); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 6ed09a85f03..e52cd310ae7 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -746,7 +746,7 @@ #define MAC_ADDR_ORDER(i) (ETH_ALEN - 1 - (i)) #define MAX_ETHERNET_BODY_SIZE 1500 -#define ETHERNET_HEADER_SIZE 14 +#define ETHERNET_HEADER_SIZE (14 + VLAN_HLEN) #define MAX_ETHERNET_PACKET_SIZE \ (MAX_ETHERNET_BODY_SIZE + ETHERNET_HEADER_SIZE) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index fc9bda9bc36..6ece4295d78 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1702,7 +1702,8 @@ static int sh_mdio_init(struct net_device *ndev, int id, /* Hook up MII support for ethtool */ mdp->mii_bus->name = "sh_mii"; mdp->mii_bus->parent = &ndev->dev; - snprintf(mdp->mii_bus->id, MII_BUS_ID_SIZE, "%x", id); + snprintf(mdp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + mdp->pdev->name, pdid); /* PHY IRQ */ mdp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); diff --git a/drivers/net/ethernet/s6gmac.c b/drivers/net/ethernet/s6gmac.c index a7ff8ea342b..22e9c0181ce 100644 --- a/drivers/net/ethernet/s6gmac.c +++ b/drivers/net/ethernet/s6gmac.c @@ -1004,7 +1004,7 @@ static int __devinit s6gmac_probe(struct platform_device *pdev) mb->write = s6mii_write; mb->reset = s6mii_reset; mb->priv = pd; - snprintf(mb->id, MII_BUS_ID_SIZE, "0"); + snprintf(mb->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, pdev->id); mb->phy_mask = ~(1 << 0); mb->irq = &pd->mii.irq[0]; for (i = 0; i < PHY_MAX_ADDR; i++) { diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 9d0b8ced023..24d2df068d7 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1044,7 +1044,8 @@ static int __devinit smsc911x_mii_init(struct platform_device *pdev, } pdata->mii_bus->name = SMSC_MDIONAME; - snprintf(pdata->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); + snprintf(pdata->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); pdata->mii_bus->priv = pdata; pdata->mii_bus->read = smsc911x_mii_read; pdata->mii_bus->write = smsc911x_mii_write; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 41e6b33e1b0..c07cfe989f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -22,6 +22,7 @@ Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> *******************************************************************************/ +#include <linux/kernel.h> #include <linux/io.h> #include "mmc.h" diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3738b470054..96fa2da3076 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -307,7 +307,7 @@ static int stmmac_init_phy(struct net_device *dev) priv->speed = 0; priv->oldduplex = -1; - snprintf(bus_id, MII_BUS_ID_SIZE, "%x", priv->plat->bus_id); + snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", priv->plat->bus_id); snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, priv->plat->phy_addr); pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id); @@ -772,7 +772,7 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) dwmac_mmc_ctrl(priv->ioaddr, mode); memset(&priv->mmc, 0, sizeof(struct stmmac_counters)); } else - pr_info(" No MAC Management Counters available"); + pr_info(" No MAC Management Counters available\n"); } static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 51f44123396..da4a1042523 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -158,7 +158,8 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->read = &stmmac_mdio_read; new_bus->write = &stmmac_mdio_write; new_bus->reset = &stmmac_mdio_reset; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", mdio_bus_data->bus_id); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", + new_bus->name, mdio_bus_data->bus_id); new_bus->priv = ndev; new_bus->irq = irqlist; new_bus->phy_mask = mdio_bus_data->phy_mask; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 7b1594f4944..1ac83243649 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -62,7 +62,7 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) priv = stmmac_dvr_probe(&(pdev->dev), plat_dat); if (!priv) { pr_err("%s: main drivr probe failed", __func__); - goto out_release_region; + goto out_unmap; } priv->ioaddr = addr; diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index aaac0c7ad11..4d9a28ffd3c 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1269,7 +1269,7 @@ int __devinit cpmac_init(void) } cpmac_mii->phy_mask = ~(mask | 0x80000000); - snprintf(cpmac_mii->id, MII_BUS_ID_SIZE, "1"); + snprintf(cpmac_mii->id, MII_BUS_ID_SIZE, "cpmac-1"); res = mdiobus_register(cpmac_mii); if (res) diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 7615040df75..ef7c9c17bff 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -313,7 +313,8 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) data->bus->reset = davinci_mdio_reset, data->bus->parent = dev; data->bus->priv = data; - snprintf(data->bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); data->clk = clk_get(dev, NULL); if (IS_ERR(data->clk)) { diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index a9ce01bafd2..164fb775d7b 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1604,7 +1604,7 @@ tsi108_init_one(struct platform_device *pdev) data->phyregs = ioremap(einfo->phyregs, 0x400); if (NULL == data->phyregs) { err = -ENOMEM; - goto regs_fail; + goto phyregs_fail; } /* MII setup */ data->mii_if.dev = dev; @@ -1663,9 +1663,11 @@ tsi108_init_one(struct platform_device *pdev) return 0; register_fail: - iounmap(data->regs); iounmap(data->phyregs); +phyregs_fail: + iounmap(data->regs); + regs_fail: free_netdev(dev); return err; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 5c4983b2870..10b18eb63d2 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -39,10 +39,9 @@ /* A few user-configurable values. These may be modified when a driver module is loaded. */ - -#define DEBUG -static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ -static int max_interrupt_work = 20; +static int debug = 0; +#define RHINE_MSG_DEFAULT \ + (0x0000) /* Set the copy breakpoint for the copy-only-tiny-frames scheme. Setting to > 1518 effectively disables this feature. */ @@ -128,12 +127,10 @@ MODULE_AUTHOR("Donald Becker <becker@scyld.com>"); MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver"); MODULE_LICENSE("GPL"); -module_param(max_interrupt_work, int, 0); module_param(debug, int, 0); module_param(rx_copybreak, int, 0); module_param(avoid_D3, bool, 0); -MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt"); -MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)"); +MODULE_PARM_DESC(debug, "VIA Rhine debug message flags"); MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames"); MODULE_PARM_DESC(avoid_D3, "Avoid power state D3 (work-around for broken BIOSes)"); @@ -351,16 +348,25 @@ static const int mmio_verify_registers[] = { /* Bits in the interrupt status/mask registers. */ enum intr_status_bits { - IntrRxDone=0x0001, IntrRxErr=0x0004, IntrRxEmpty=0x0020, - IntrTxDone=0x0002, IntrTxError=0x0008, IntrTxUnderrun=0x0210, - IntrPCIErr=0x0040, - IntrStatsMax=0x0080, IntrRxEarly=0x0100, - IntrRxOverflow=0x0400, IntrRxDropped=0x0800, IntrRxNoBuf=0x1000, - IntrTxAborted=0x2000, IntrLinkChange=0x4000, - IntrRxWakeUp=0x8000, - IntrNormalSummary=0x0003, IntrAbnormalSummary=0xC260, - IntrTxDescRace=0x080000, /* mapped from IntrStatus2 */ - IntrTxErrSummary=0x082218, + IntrRxDone = 0x0001, + IntrTxDone = 0x0002, + IntrRxErr = 0x0004, + IntrTxError = 0x0008, + IntrRxEmpty = 0x0020, + IntrPCIErr = 0x0040, + IntrStatsMax = 0x0080, + IntrRxEarly = 0x0100, + IntrTxUnderrun = 0x0210, + IntrRxOverflow = 0x0400, + IntrRxDropped = 0x0800, + IntrRxNoBuf = 0x1000, + IntrTxAborted = 0x2000, + IntrLinkChange = 0x4000, + IntrRxWakeUp = 0x8000, + IntrTxDescRace = 0x080000, /* mapped from IntrStatus2 */ + IntrNormalSummary = IntrRxDone | IntrTxDone, + IntrTxErrSummary = IntrTxDescRace | IntrTxAborted | IntrTxError | + IntrTxUnderrun, }; /* Bits in WOLcrSet/WOLcrClr and PwrcsrSet/PwrcsrClr */ @@ -439,8 +445,13 @@ struct rhine_private { struct net_device *dev; struct napi_struct napi; spinlock_t lock; + struct mutex task_lock; + bool task_enable; + struct work_struct slow_event_task; struct work_struct reset_task; + u32 msg_enable; + /* Frequently used values: keep some adjacent for cache effect. */ u32 quirks; struct rx_desc *rx_head_desc; @@ -476,41 +487,50 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int rhine_open(struct net_device *dev); static void rhine_reset_task(struct work_struct *work); +static void rhine_slow_event_task(struct work_struct *work); static void rhine_tx_timeout(struct net_device *dev); static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t rhine_interrupt(int irq, void *dev_instance); static void rhine_tx(struct net_device *dev); static int rhine_rx(struct net_device *dev, int limit); -static void rhine_error(struct net_device *dev, int intr_status); static void rhine_set_rx_mode(struct net_device *dev); static struct net_device_stats *rhine_get_stats(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static const struct ethtool_ops netdev_ethtool_ops; static int rhine_close(struct net_device *dev); -static void rhine_shutdown (struct pci_dev *pdev); static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid); static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid); -static void rhine_set_cam(void __iomem *ioaddr, int idx, u8 *addr); -static void rhine_set_vlan_cam(void __iomem *ioaddr, int idx, u8 *addr); -static void rhine_set_cam_mask(void __iomem *ioaddr, u32 mask); -static void rhine_set_vlan_cam_mask(void __iomem *ioaddr, u32 mask); -static void rhine_init_cam_filter(struct net_device *dev); -static void rhine_update_vcam(struct net_device *dev); - -#define RHINE_WAIT_FOR(condition) \ -do { \ - int i = 1024; \ - while (!(condition) && --i) \ - ; \ - if (debug > 1 && i < 512) \ - pr_info("%4d cycles used @ %s:%d\n", \ - 1024 - i, __func__, __LINE__); \ -} while (0) - -static inline u32 get_intr_status(struct net_device *dev) +static void rhine_restart_tx(struct net_device *dev); + +static void rhine_wait_bit(struct rhine_private *rp, u8 reg, u8 mask, bool high) +{ + void __iomem *ioaddr = rp->base; + int i; + + for (i = 0; i < 1024; i++) { + if (high ^ !!(ioread8(ioaddr + reg) & mask)) + break; + udelay(10); + } + if (i > 64) { + netif_dbg(rp, hw, rp->dev, "%s bit wait (%02x/%02x) cycle " + "count: %04d\n", high ? "high" : "low", reg, mask, i); + } +} + +static void rhine_wait_bit_high(struct rhine_private *rp, u8 reg, u8 mask) +{ + rhine_wait_bit(rp, reg, mask, true); +} + +static void rhine_wait_bit_low(struct rhine_private *rp, u8 reg, u8 mask) +{ + rhine_wait_bit(rp, reg, mask, false); +} + +static u32 rhine_get_events(struct rhine_private *rp) { - struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; u32 intr_status; @@ -521,6 +541,16 @@ static inline u32 get_intr_status(struct net_device *dev) return intr_status; } +static void rhine_ack_events(struct rhine_private *rp, u32 mask) +{ + void __iomem *ioaddr = rp->base; + + if (rp->quirks & rqStatusWBRace) + iowrite8(mask >> 16, ioaddr + IntrStatus2); + iowrite16(mask, ioaddr + IntrStatus); + mmiowb(); +} + /* * Get power related registers into sane state. * Notify user about past WOL event. @@ -585,6 +615,7 @@ static void rhine_chip_reset(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; + u8 cmd1; iowrite8(Cmd1Reset, ioaddr + ChipCmd1); IOSYNC; @@ -597,13 +628,12 @@ static void rhine_chip_reset(struct net_device *dev) iowrite8(0x40, ioaddr + MiscCmd); /* Reset can take somewhat longer (rare) */ - RHINE_WAIT_FOR(!(ioread8(ioaddr + ChipCmd1) & Cmd1Reset)); + rhine_wait_bit_low(rp, ChipCmd1, Cmd1Reset); } - if (debug > 1) - netdev_info(dev, "Reset %s\n", - (ioread8(ioaddr + ChipCmd1) & Cmd1Reset) ? - "failed" : "succeeded"); + cmd1 = ioread8(ioaddr + ChipCmd1); + netif_info(rp, hw, dev, "Reset %s\n", (cmd1 & Cmd1Reset) ? + "failed" : "succeeded"); } #ifdef USE_MMIO @@ -629,9 +659,15 @@ static void __devinit rhine_reload_eeprom(long pioaddr, struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; + int i; outb(0x20, pioaddr + MACRegEEcsr); - RHINE_WAIT_FOR(!(inb(pioaddr + MACRegEEcsr) & 0x20)); + for (i = 0; i < 1024; i++) { + if (!(inb(pioaddr + MACRegEEcsr) & 0x20)) + break; + } + if (i > 512) + pr_info("%4d cycles used @ %s:%d\n", i, __func__, __LINE__); #ifdef USE_MMIO /* @@ -657,23 +693,127 @@ static void rhine_poll(struct net_device *dev) } #endif +static void rhine_kick_tx_threshold(struct rhine_private *rp) +{ + if (rp->tx_thresh < 0xe0) { + void __iomem *ioaddr = rp->base; + + rp->tx_thresh += 0x20; + BYTE_REG_BITS_SET(rp->tx_thresh, 0x80, ioaddr + TxConfig); + } +} + +static void rhine_tx_err(struct rhine_private *rp, u32 status) +{ + struct net_device *dev = rp->dev; + + if (status & IntrTxAborted) { + netif_info(rp, tx_err, dev, + "Abort %08x, frame dropped\n", status); + } + + if (status & IntrTxUnderrun) { + rhine_kick_tx_threshold(rp); + netif_info(rp, tx_err ,dev, "Transmitter underrun, " + "Tx threshold now %02x\n", rp->tx_thresh); + } + + if (status & IntrTxDescRace) + netif_info(rp, tx_err, dev, "Tx descriptor write-back race\n"); + + if ((status & IntrTxError) && + (status & (IntrTxAborted | IntrTxUnderrun | IntrTxDescRace)) == 0) { + rhine_kick_tx_threshold(rp); + netif_info(rp, tx_err, dev, "Unspecified error. " + "Tx threshold now %02x\n", rp->tx_thresh); + } + + rhine_restart_tx(dev); +} + +static void rhine_update_rx_crc_and_missed_errord(struct rhine_private *rp) +{ + void __iomem *ioaddr = rp->base; + struct net_device_stats *stats = &rp->dev->stats; + + stats->rx_crc_errors += ioread16(ioaddr + RxCRCErrs); + stats->rx_missed_errors += ioread16(ioaddr + RxMissed); + + /* + * Clears the "tally counters" for CRC errors and missed frames(?). + * It has been reported that some chips need a write of 0 to clear + * these, for others the counters are set to 1 when written to and + * instead cleared when read. So we clear them both ways ... + */ + iowrite32(0, ioaddr + RxMissed); + ioread16(ioaddr + RxCRCErrs); + ioread16(ioaddr + RxMissed); +} + +#define RHINE_EVENT_NAPI_RX (IntrRxDone | \ + IntrRxErr | \ + IntrRxEmpty | \ + IntrRxOverflow | \ + IntrRxDropped | \ + IntrRxNoBuf | \ + IntrRxWakeUp) + +#define RHINE_EVENT_NAPI_TX_ERR (IntrTxError | \ + IntrTxAborted | \ + IntrTxUnderrun | \ + IntrTxDescRace) +#define RHINE_EVENT_NAPI_TX (IntrTxDone | RHINE_EVENT_NAPI_TX_ERR) + +#define RHINE_EVENT_NAPI (RHINE_EVENT_NAPI_RX | \ + RHINE_EVENT_NAPI_TX | \ + IntrStatsMax) +#define RHINE_EVENT_SLOW (IntrPCIErr | IntrLinkChange) +#define RHINE_EVENT (RHINE_EVENT_NAPI | RHINE_EVENT_SLOW) + static int rhine_napipoll(struct napi_struct *napi, int budget) { struct rhine_private *rp = container_of(napi, struct rhine_private, napi); struct net_device *dev = rp->dev; void __iomem *ioaddr = rp->base; - int work_done; + u16 enable_mask = RHINE_EVENT & 0xffff; + int work_done = 0; + u32 status; + + status = rhine_get_events(rp); + rhine_ack_events(rp, status & ~RHINE_EVENT_SLOW); + + if (status & RHINE_EVENT_NAPI_RX) + work_done += rhine_rx(dev, budget); + + if (status & RHINE_EVENT_NAPI_TX) { + if (status & RHINE_EVENT_NAPI_TX_ERR) { + /* Avoid scavenging before Tx engine turned off */ + rhine_wait_bit_low(rp, ChipCmd, CmdTxOn); + if (ioread8(ioaddr + ChipCmd) & CmdTxOn) + netif_warn(rp, tx_err, dev, "Tx still on\n"); + } - work_done = rhine_rx(dev, budget); + rhine_tx(dev); + + if (status & RHINE_EVENT_NAPI_TX_ERR) + rhine_tx_err(rp, status); + } + + if (status & IntrStatsMax) { + spin_lock(&rp->lock); + rhine_update_rx_crc_and_missed_errord(rp); + spin_unlock(&rp->lock); + } + + if (status & RHINE_EVENT_SLOW) { + enable_mask &= ~RHINE_EVENT_SLOW; + schedule_work(&rp->slow_event_task); + } if (work_done < budget) { napi_complete(napi); - - iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow | - IntrRxDropped | IntrRxNoBuf | IntrTxAborted | - IntrTxDone | IntrTxError | IntrTxUnderrun | - IntrPCIErr | IntrStatsMax | IntrLinkChange, - ioaddr + IntrEnable); + iowrite16(enable_mask, ioaddr + IntrEnable); + mmiowb(); } return work_done; } @@ -797,6 +937,7 @@ static int __devinit rhine_init_one(struct pci_dev *pdev, rp->quirks = quirks; rp->pioaddr = pioaddr; rp->pdev = pdev; + rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT); rc = pci_request_regions(pdev, DRV_NAME); if (rc) @@ -856,7 +997,9 @@ static int __devinit rhine_init_one(struct pci_dev *pdev, dev->irq = pdev->irq; spin_lock_init(&rp->lock); + mutex_init(&rp->task_lock); INIT_WORK(&rp->reset_task, rhine_reset_task); + INIT_WORK(&rp->slow_event_task, rhine_slow_event_task); rp->mii_if.dev = dev; rp->mii_if.mdio_read = mdio_read; @@ -916,8 +1059,8 @@ static int __devinit rhine_init_one(struct pci_dev *pdev, } } rp->mii_if.phy_id = phy_id; - if (debug > 1 && avoid_D3) - netdev_info(dev, "No D3 power state at shutdown\n"); + if (avoid_D3) + netif_info(rp, probe, dev, "No D3 power state at shutdown\n"); return 0; @@ -1093,7 +1236,7 @@ static void rhine_check_media(struct net_device *dev, unsigned int init_media) struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; - mii_check_media(&rp->mii_if, debug, init_media); + mii_check_media(&rp->mii_if, netif_msg_link(rp), init_media); if (rp->mii_if.full_duplex) iowrite8(ioread8(ioaddr + ChipCmd1) | Cmd1FDuplex, @@ -1101,24 +1244,26 @@ static void rhine_check_media(struct net_device *dev, unsigned int init_media) else iowrite8(ioread8(ioaddr + ChipCmd1) & ~Cmd1FDuplex, ioaddr + ChipCmd1); - if (debug > 1) - netdev_info(dev, "force_media %d, carrier %d\n", - rp->mii_if.force_media, netif_carrier_ok(dev)); + + netif_info(rp, link, dev, "force_media %d, carrier %d\n", + rp->mii_if.force_media, netif_carrier_ok(dev)); } /* Called after status of force_media possibly changed */ static void rhine_set_carrier(struct mii_if_info *mii) { + struct net_device *dev = mii->dev; + struct rhine_private *rp = netdev_priv(dev); + if (mii->force_media) { /* autoneg is off: Link is always assumed to be up */ - if (!netif_carrier_ok(mii->dev)) - netif_carrier_on(mii->dev); - } - else /* Let MMI library update carrier status */ - rhine_check_media(mii->dev, 0); - if (debug > 1) - netdev_info(mii->dev, "force_media %d, carrier %d\n", - mii->force_media, netif_carrier_ok(mii->dev)); + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else /* Let MMI library update carrier status */ + rhine_check_media(dev, 0); + + netif_info(rp, link, dev, "force_media %d, carrier %d\n", + mii->force_media, netif_carrier_ok(dev)); } /** @@ -1266,10 +1411,10 @@ static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) { struct rhine_private *rp = netdev_priv(dev); - spin_lock_irq(&rp->lock); + spin_lock_bh(&rp->lock); set_bit(vid, rp->active_vlans); rhine_update_vcam(dev); - spin_unlock_irq(&rp->lock); + spin_unlock_bh(&rp->lock); return 0; } @@ -1277,10 +1422,10 @@ static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) { struct rhine_private *rp = netdev_priv(dev); - spin_lock_irq(&rp->lock); + spin_lock_bh(&rp->lock); clear_bit(vid, rp->active_vlans); rhine_update_vcam(dev); - spin_unlock_irq(&rp->lock); + spin_unlock_bh(&rp->lock); return 0; } @@ -1310,12 +1455,7 @@ static void init_registers(struct net_device *dev) napi_enable(&rp->napi); - /* Enable interrupts by setting the interrupt mask. */ - iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow | - IntrRxDropped | IntrRxNoBuf | IntrTxAborted | - IntrTxDone | IntrTxError | IntrTxUnderrun | - IntrPCIErr | IntrStatsMax | IntrLinkChange, - ioaddr + IntrEnable); + iowrite16(RHINE_EVENT & 0xffff, ioaddr + IntrEnable); iowrite16(CmdStart | CmdTxOn | CmdRxOn | (Cmd1NoTxPoll << 8), ioaddr + ChipCmd); @@ -1323,23 +1463,27 @@ static void init_registers(struct net_device *dev) } /* Enable MII link status auto-polling (required for IntrLinkChange) */ -static void rhine_enable_linkmon(void __iomem *ioaddr) +static void rhine_enable_linkmon(struct rhine_private *rp) { + void __iomem *ioaddr = rp->base; + iowrite8(0, ioaddr + MIICmd); iowrite8(MII_BMSR, ioaddr + MIIRegAddr); iowrite8(0x80, ioaddr + MIICmd); - RHINE_WAIT_FOR((ioread8(ioaddr + MIIRegAddr) & 0x20)); + rhine_wait_bit_high(rp, MIIRegAddr, 0x20); iowrite8(MII_BMSR | 0x40, ioaddr + MIIRegAddr); } /* Disable MII link status auto-polling (required for MDIO access) */ -static void rhine_disable_linkmon(void __iomem *ioaddr, u32 quirks) +static void rhine_disable_linkmon(struct rhine_private *rp) { + void __iomem *ioaddr = rp->base; + iowrite8(0, ioaddr + MIICmd); - if (quirks & rqRhineI) { + if (rp->quirks & rqRhineI) { iowrite8(0x01, ioaddr + MIIRegAddr); // MII_BMSR /* Can be called from ISR. Evil. */ @@ -1348,13 +1492,13 @@ static void rhine_disable_linkmon(void __iomem *ioaddr, u32 quirks) /* 0x80 must be set immediately before turning it off */ iowrite8(0x80, ioaddr + MIICmd); - RHINE_WAIT_FOR(ioread8(ioaddr + MIIRegAddr) & 0x20); + rhine_wait_bit_high(rp, MIIRegAddr, 0x20); /* Heh. Now clear 0x80 again. */ iowrite8(0, ioaddr + MIICmd); } else - RHINE_WAIT_FOR(ioread8(ioaddr + MIIRegAddr) & 0x80); + rhine_wait_bit_high(rp, MIIRegAddr, 0x80); } /* Read and write over the MII Management Data I/O (MDIO) interface. */ @@ -1365,16 +1509,16 @@ static int mdio_read(struct net_device *dev, int phy_id, int regnum) void __iomem *ioaddr = rp->base; int result; - rhine_disable_linkmon(ioaddr, rp->quirks); + rhine_disable_linkmon(rp); /* rhine_disable_linkmon already cleared MIICmd */ iowrite8(phy_id, ioaddr + MIIPhyAddr); iowrite8(regnum, ioaddr + MIIRegAddr); iowrite8(0x40, ioaddr + MIICmd); /* Trigger read */ - RHINE_WAIT_FOR(!(ioread8(ioaddr + MIICmd) & 0x40)); + rhine_wait_bit_low(rp, MIICmd, 0x40); result = ioread16(ioaddr + MIIData); - rhine_enable_linkmon(ioaddr); + rhine_enable_linkmon(rp); return result; } @@ -1383,16 +1527,33 @@ static void mdio_write(struct net_device *dev, int phy_id, int regnum, int value struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; - rhine_disable_linkmon(ioaddr, rp->quirks); + rhine_disable_linkmon(rp); /* rhine_disable_linkmon already cleared MIICmd */ iowrite8(phy_id, ioaddr + MIIPhyAddr); iowrite8(regnum, ioaddr + MIIRegAddr); iowrite16(value, ioaddr + MIIData); iowrite8(0x20, ioaddr + MIICmd); /* Trigger write */ - RHINE_WAIT_FOR(!(ioread8(ioaddr + MIICmd) & 0x20)); + rhine_wait_bit_low(rp, MIICmd, 0x20); - rhine_enable_linkmon(ioaddr); + rhine_enable_linkmon(rp); +} + +static void rhine_task_disable(struct rhine_private *rp) +{ + mutex_lock(&rp->task_lock); + rp->task_enable = false; + mutex_unlock(&rp->task_lock); + + cancel_work_sync(&rp->slow_event_task); + cancel_work_sync(&rp->reset_task); +} + +static void rhine_task_enable(struct rhine_private *rp) +{ + mutex_lock(&rp->task_lock); + rp->task_enable = true; + mutex_unlock(&rp->task_lock); } static int rhine_open(struct net_device *dev) @@ -1406,8 +1567,7 @@ static int rhine_open(struct net_device *dev) if (rc) return rc; - if (debug > 1) - netdev_dbg(dev, "%s() irq %d\n", __func__, rp->pdev->irq); + netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->pdev->irq); rc = alloc_ring(dev); if (rc) { @@ -1417,11 +1577,12 @@ static int rhine_open(struct net_device *dev) alloc_rbufs(dev); alloc_tbufs(dev); rhine_chip_reset(dev); + rhine_task_enable(rp); init_registers(dev); - if (debug > 2) - netdev_dbg(dev, "%s() Done - status %04x MII status: %04x\n", - __func__, ioread16(ioaddr + ChipCmd), - mdio_read(dev, rp->mii_if.phy_id, MII_BMSR)); + + netif_dbg(rp, ifup, dev, "%s() Done - status %04x MII status: %04x\n", + __func__, ioread16(ioaddr + ChipCmd), + mdio_read(dev, rp->mii_if.phy_id, MII_BMSR)); netif_start_queue(dev); @@ -1434,11 +1595,12 @@ static void rhine_reset_task(struct work_struct *work) reset_task); struct net_device *dev = rp->dev; - /* protect against concurrent rx interrupts */ - disable_irq(rp->pdev->irq); + mutex_lock(&rp->task_lock); - napi_disable(&rp->napi); + if (!rp->task_enable) + goto out_unlock; + napi_disable(&rp->napi); spin_lock_bh(&rp->lock); /* clear all descriptors */ @@ -1452,11 +1614,13 @@ static void rhine_reset_task(struct work_struct *work) init_registers(dev); spin_unlock_bh(&rp->lock); - enable_irq(rp->pdev->irq); dev->trans_start = jiffies; /* prevent tx timeout */ dev->stats.tx_errors++; netif_wake_queue(dev); + +out_unlock: + mutex_unlock(&rp->task_lock); } static void rhine_tx_timeout(struct net_device *dev) @@ -1477,7 +1641,6 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; unsigned entry; - unsigned long flags; /* Caution: the write order is important here, set the field with the "ownership" bits last. */ @@ -1529,7 +1692,6 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, rp->tx_ring[entry].tx_status = 0; /* lock eth irq */ - spin_lock_irqsave(&rp->lock, flags); wmb(); rp->tx_ring[entry].tx_status |= cpu_to_le32(DescOwn); wmb(); @@ -1550,78 +1712,43 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN) netif_stop_queue(dev); - spin_unlock_irqrestore(&rp->lock, flags); + netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n", + rp->cur_tx - 1, entry); - if (debug > 4) { - netdev_dbg(dev, "Transmit frame #%d queued in slot %d\n", - rp->cur_tx-1, entry); - } return NETDEV_TX_OK; } +static void rhine_irq_disable(struct rhine_private *rp) +{ + iowrite16(0x0000, rp->base + IntrEnable); + mmiowb(); +} + /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ static irqreturn_t rhine_interrupt(int irq, void *dev_instance) { struct net_device *dev = dev_instance; struct rhine_private *rp = netdev_priv(dev); - void __iomem *ioaddr = rp->base; - u32 intr_status; - int boguscnt = max_interrupt_work; + u32 status; int handled = 0; - while ((intr_status = get_intr_status(dev))) { - handled = 1; - - /* Acknowledge all of the current interrupt sources ASAP. */ - if (intr_status & IntrTxDescRace) - iowrite8(0x08, ioaddr + IntrStatus2); - iowrite16(intr_status & 0xffff, ioaddr + IntrStatus); - IOSYNC; + status = rhine_get_events(rp); - if (debug > 4) - netdev_dbg(dev, "Interrupt, status %08x\n", - intr_status); - - if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped | - IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf)) { - iowrite16(IntrTxAborted | - IntrTxDone | IntrTxError | IntrTxUnderrun | - IntrPCIErr | IntrStatsMax | IntrLinkChange, - ioaddr + IntrEnable); - - napi_schedule(&rp->napi); - } + netif_dbg(rp, intr, dev, "Interrupt, status %08x\n", status); - if (intr_status & (IntrTxErrSummary | IntrTxDone)) { - if (intr_status & IntrTxErrSummary) { - /* Avoid scavenging before Tx engine turned off */ - RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn)); - if (debug > 2 && - ioread8(ioaddr+ChipCmd) & CmdTxOn) - netdev_warn(dev, - "%s: Tx engine still on\n", - __func__); - } - rhine_tx(dev); - } + if (status & RHINE_EVENT) { + handled = 1; - /* Abnormal error summary/uncommon events handlers. */ - if (intr_status & (IntrPCIErr | IntrLinkChange | - IntrStatsMax | IntrTxError | IntrTxAborted | - IntrTxUnderrun | IntrTxDescRace)) - rhine_error(dev, intr_status); + rhine_irq_disable(rp); + napi_schedule(&rp->napi); + } - if (--boguscnt < 0) { - netdev_warn(dev, "Too much work at interrupt, status=%#08x\n", - intr_status); - break; - } + if (status & ~(IntrLinkChange | IntrStatsMax | RHINE_EVENT_NAPI)) { + netif_err(rp, intr, dev, "Something Wicked happened! %08x\n", + status); } - if (debug > 3) - netdev_dbg(dev, "exiting interrupt, status=%08x\n", - ioread16(ioaddr + IntrStatus)); return IRQ_RETVAL(handled); } @@ -1632,20 +1759,16 @@ static void rhine_tx(struct net_device *dev) struct rhine_private *rp = netdev_priv(dev); int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE; - spin_lock(&rp->lock); - /* find and cleanup dirty tx descriptors */ while (rp->dirty_tx != rp->cur_tx) { txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status); - if (debug > 6) - netdev_dbg(dev, "Tx scavenge %d status %08x\n", - entry, txstatus); + netif_dbg(rp, tx_done, dev, "Tx scavenge %d status %08x\n", + entry, txstatus); if (txstatus & DescOwn) break; if (txstatus & 0x8000) { - if (debug > 1) - netdev_dbg(dev, "Transmit error, Tx status %08x\n", - txstatus); + netif_dbg(rp, tx_done, dev, + "Transmit error, Tx status %08x\n", txstatus); dev->stats.tx_errors++; if (txstatus & 0x0400) dev->stats.tx_carrier_errors++; @@ -1667,10 +1790,8 @@ static void rhine_tx(struct net_device *dev) dev->stats.collisions += (txstatus >> 3) & 0x0F; else dev->stats.collisions += txstatus & 0x0F; - if (debug > 6) - netdev_dbg(dev, "collisions: %1.1x:%1.1x\n", - (txstatus >> 3) & 0xF, - txstatus & 0xF); + netif_dbg(rp, tx_done, dev, "collisions: %1.1x:%1.1x\n", + (txstatus >> 3) & 0xF, txstatus & 0xF); dev->stats.tx_bytes += rp->tx_skbuff[entry]->len; dev->stats.tx_packets++; } @@ -1687,8 +1808,6 @@ static void rhine_tx(struct net_device *dev) } if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4) netif_wake_queue(dev); - - spin_unlock(&rp->lock); } /** @@ -1713,11 +1832,8 @@ static int rhine_rx(struct net_device *dev, int limit) int count; int entry = rp->cur_rx % RX_RING_SIZE; - if (debug > 4) { - netdev_dbg(dev, "%s(), entry %d status %08x\n", - __func__, entry, - le32_to_cpu(rp->rx_head_desc->rx_status)); - } + netif_dbg(rp, rx_status, dev, "%s(), entry %d status %08x\n", __func__, + entry, le32_to_cpu(rp->rx_head_desc->rx_status)); /* If EOP is set on the next entry, it's a new packet. Send it up. */ for (count = 0; count < limit; ++count) { @@ -1729,9 +1845,8 @@ static int rhine_rx(struct net_device *dev, int limit) if (desc_status & DescOwn) break; - if (debug > 4) - netdev_dbg(dev, "%s() status is %08x\n", - __func__, desc_status); + netif_dbg(rp, rx_status, dev, "%s() status %08x\n", __func__, + desc_status); if ((desc_status & (RxWholePkt | RxErr)) != RxWholePkt) { if ((desc_status & RxWholePkt) != RxWholePkt) { @@ -1747,9 +1862,9 @@ static int rhine_rx(struct net_device *dev, int limit) dev->stats.rx_length_errors++; } else if (desc_status & RxErr) { /* There was a error. */ - if (debug > 2) - netdev_dbg(dev, "%s() Rx error was %08x\n", - __func__, desc_status); + netif_dbg(rp, rx_err, dev, + "%s() Rx error %08x\n", __func__, + desc_status); dev->stats.rx_errors++; if (desc_status & 0x0030) dev->stats.rx_length_errors++; @@ -1839,19 +1954,6 @@ static int rhine_rx(struct net_device *dev, int limit) return count; } -/* - * Clears the "tally counters" for CRC errors and missed frames(?). - * It has been reported that some chips need a write of 0 to clear - * these, for others the counters are set to 1 when written to and - * instead cleared when read. So we clear them both ways ... - */ -static inline void clear_tally_counters(void __iomem *ioaddr) -{ - iowrite32(0, ioaddr + RxMissed); - ioread16(ioaddr + RxCRCErrs); - ioread16(ioaddr + RxMissed); -} - static void rhine_restart_tx(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; @@ -1862,7 +1964,7 @@ static void rhine_restart_tx(struct net_device *dev) { * If new errors occurred, we need to sort them out before doing Tx. * In that case the ISR will be back here RSN anyway. */ - intr_status = get_intr_status(dev); + intr_status = rhine_get_events(rp); if ((intr_status & IntrTxErrSummary) == 0) { @@ -1883,79 +1985,50 @@ static void rhine_restart_tx(struct net_device *dev) { } else { /* This should never happen */ - if (debug > 1) - netdev_warn(dev, "%s() Another error occurred %08x\n", - __func__, intr_status); + netif_warn(rp, tx_err, dev, "another error occurred %08x\n", + intr_status); } } -static void rhine_error(struct net_device *dev, int intr_status) +static void rhine_slow_event_task(struct work_struct *work) { - struct rhine_private *rp = netdev_priv(dev); - void __iomem *ioaddr = rp->base; + struct rhine_private *rp = + container_of(work, struct rhine_private, slow_event_task); + struct net_device *dev = rp->dev; + u32 intr_status; - spin_lock(&rp->lock); + mutex_lock(&rp->task_lock); + + if (!rp->task_enable) + goto out_unlock; + + intr_status = rhine_get_events(rp); + rhine_ack_events(rp, intr_status & RHINE_EVENT_SLOW); if (intr_status & IntrLinkChange) rhine_check_media(dev, 0); - if (intr_status & IntrStatsMax) { - dev->stats.rx_crc_errors += ioread16(ioaddr + RxCRCErrs); - dev->stats.rx_missed_errors += ioread16(ioaddr + RxMissed); - clear_tally_counters(ioaddr); - } - if (intr_status & IntrTxAborted) { - if (debug > 1) - netdev_info(dev, "Abort %08x, frame dropped\n", - intr_status); - } - if (intr_status & IntrTxUnderrun) { - if (rp->tx_thresh < 0xE0) - BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0x80, ioaddr + TxConfig); - if (debug > 1) - netdev_info(dev, "Transmitter underrun, Tx threshold now %02x\n", - rp->tx_thresh); - } - if (intr_status & IntrTxDescRace) { - if (debug > 2) - netdev_info(dev, "Tx descriptor write-back race\n"); - } - if ((intr_status & IntrTxError) && - (intr_status & (IntrTxAborted | - IntrTxUnderrun | IntrTxDescRace)) == 0) { - if (rp->tx_thresh < 0xE0) { - BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0x80, ioaddr + TxConfig); - } - if (debug > 1) - netdev_info(dev, "Unspecified error. Tx threshold now %02x\n", - rp->tx_thresh); - } - if (intr_status & (IntrTxAborted | IntrTxUnderrun | IntrTxDescRace | - IntrTxError)) - rhine_restart_tx(dev); - - if (intr_status & ~(IntrLinkChange | IntrStatsMax | IntrTxUnderrun | - IntrTxError | IntrTxAborted | IntrNormalSummary | - IntrTxDescRace)) { - if (debug > 1) - netdev_err(dev, "Something Wicked happened! %08x\n", - intr_status); - } - spin_unlock(&rp->lock); + if (intr_status & IntrPCIErr) + netif_warn(rp, hw, dev, "PCI error\n"); + + napi_disable(&rp->napi); + rhine_irq_disable(rp); + /* Slow and safe. Consider __napi_schedule as a replacement ? */ + napi_enable(&rp->napi); + napi_schedule(&rp->napi); + +out_unlock: + mutex_unlock(&rp->task_lock); } static struct net_device_stats *rhine_get_stats(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); - void __iomem *ioaddr = rp->base; - unsigned long flags; - spin_lock_irqsave(&rp->lock, flags); - dev->stats.rx_crc_errors += ioread16(ioaddr + RxCRCErrs); - dev->stats.rx_missed_errors += ioread16(ioaddr + RxMissed); - clear_tally_counters(ioaddr); - spin_unlock_irqrestore(&rp->lock, flags); + spin_lock_bh(&rp->lock); + rhine_update_rx_crc_and_missed_errord(rp); + spin_unlock_bh(&rp->lock); return &dev->stats; } @@ -2022,9 +2095,9 @@ static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct rhine_private *rp = netdev_priv(dev); int rc; - spin_lock_irq(&rp->lock); + mutex_lock(&rp->task_lock); rc = mii_ethtool_gset(&rp->mii_if, cmd); - spin_unlock_irq(&rp->lock); + mutex_unlock(&rp->task_lock); return rc; } @@ -2034,10 +2107,10 @@ static int netdev_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct rhine_private *rp = netdev_priv(dev); int rc; - spin_lock_irq(&rp->lock); + mutex_lock(&rp->task_lock); rc = mii_ethtool_sset(&rp->mii_if, cmd); - spin_unlock_irq(&rp->lock); rhine_set_carrier(&rp->mii_if); + mutex_unlock(&rp->task_lock); return rc; } @@ -2058,12 +2131,16 @@ static u32 netdev_get_link(struct net_device *dev) static u32 netdev_get_msglevel(struct net_device *dev) { - return debug; + struct rhine_private *rp = netdev_priv(dev); + + return rp->msg_enable; } static void netdev_set_msglevel(struct net_device *dev, u32 value) { - debug = value; + struct rhine_private *rp = netdev_priv(dev); + + rp->msg_enable = value; } static void rhine_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) @@ -2119,10 +2196,10 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!netif_running(dev)) return -EINVAL; - spin_lock_irq(&rp->lock); + mutex_lock(&rp->task_lock); rc = generic_mii_ioctl(&rp->mii_if, if_mii(rq), cmd, NULL); - spin_unlock_irq(&rp->lock); rhine_set_carrier(&rp->mii_if); + mutex_unlock(&rp->task_lock); return rc; } @@ -2132,27 +2209,21 @@ static int rhine_close(struct net_device *dev) struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; + rhine_task_disable(rp); napi_disable(&rp->napi); - cancel_work_sync(&rp->reset_task); netif_stop_queue(dev); - spin_lock_irq(&rp->lock); - - if (debug > 1) - netdev_dbg(dev, "Shutting down ethercard, status was %04x\n", - ioread16(ioaddr + ChipCmd)); + netif_dbg(rp, ifdown, dev, "Shutting down ethercard, status was %04x\n", + ioread16(ioaddr + ChipCmd)); /* Switch to loopback mode to avoid hardware races. */ iowrite8(rp->tx_thresh | 0x02, ioaddr + TxConfig); - /* Disable interrupts by clearing the interrupt mask. */ - iowrite16(0x0000, ioaddr + IntrEnable); + rhine_irq_disable(rp); /* Stop the chip's Tx and Rx processes. */ iowrite16(CmdStop, ioaddr + ChipCmd); - spin_unlock_irq(&rp->lock); - free_irq(rp->pdev->irq, dev); free_rbufs(dev); free_tbufs(dev); @@ -2192,6 +2263,8 @@ static void rhine_shutdown (struct pci_dev *pdev) if (rp->quirks & rq6patterns) iowrite8(0x04, ioaddr + WOLcgClr); + spin_lock(&rp->lock); + if (rp->wolopts & WAKE_MAGIC) { iowrite8(WOLmagic, ioaddr + WOLcrSet); /* @@ -2216,58 +2289,46 @@ static void rhine_shutdown (struct pci_dev *pdev) iowrite8(ioread8(ioaddr + StickyHW) | 0x04, ioaddr + StickyHW); } - /* Hit power state D3 (sleep) */ - if (!avoid_D3) - iowrite8(ioread8(ioaddr + StickyHW) | 0x03, ioaddr + StickyHW); + spin_unlock(&rp->lock); - /* TODO: Check use of pci_enable_wake() */ + if (system_state == SYSTEM_POWER_OFF && !avoid_D3) { + iowrite8(ioread8(ioaddr + StickyHW) | 0x03, ioaddr + StickyHW); + pci_wake_from_d3(pdev, true); + pci_set_power_state(pdev, PCI_D3hot); + } } -#ifdef CONFIG_PM -static int rhine_suspend(struct pci_dev *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int rhine_suspend(struct device *device) { + struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); - unsigned long flags; if (!netif_running(dev)) return 0; + rhine_task_disable(rp); + rhine_irq_disable(rp); napi_disable(&rp->napi); netif_device_detach(dev); - pci_save_state(pdev); - spin_lock_irqsave(&rp->lock, flags); rhine_shutdown(pdev); - spin_unlock_irqrestore(&rp->lock, flags); - free_irq(dev->irq, dev); return 0; } -static int rhine_resume(struct pci_dev *pdev) +static int rhine_resume(struct device *device) { + struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); - unsigned long flags; - int ret; if (!netif_running(dev)) return 0; - if (request_irq(dev->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev)) - netdev_err(dev, "request_irq failed\n"); - - ret = pci_set_power_state(pdev, PCI_D0); - if (debug > 1) - netdev_info(dev, "Entering power state D0 %s (%d)\n", - ret ? "failed" : "succeeded", ret); - - pci_restore_state(pdev); - - spin_lock_irqsave(&rp->lock, flags); #ifdef USE_MMIO enable_mmio(rp->pioaddr, rp->quirks); #endif @@ -2276,25 +2337,32 @@ static int rhine_resume(struct pci_dev *pdev) free_rbufs(dev); alloc_tbufs(dev); alloc_rbufs(dev); + rhine_task_enable(rp); + spin_lock_bh(&rp->lock); init_registers(dev); - spin_unlock_irqrestore(&rp->lock, flags); + spin_unlock_bh(&rp->lock); netif_device_attach(dev); return 0; } -#endif /* CONFIG_PM */ + +static SIMPLE_DEV_PM_OPS(rhine_pm_ops, rhine_suspend, rhine_resume); +#define RHINE_PM_OPS (&rhine_pm_ops) + +#else + +#define RHINE_PM_OPS NULL + +#endif /* !CONFIG_PM_SLEEP */ static struct pci_driver rhine_driver = { .name = DRV_NAME, .id_table = rhine_pci_tbl, .probe = rhine_init_one, .remove = __devexit_p(rhine_remove_one), -#ifdef CONFIG_PM - .suspend = rhine_suspend, - .resume = rhine_resume, -#endif /* CONFIG_PM */ - .shutdown = rhine_shutdown, + .shutdown = rhine_shutdown, + .driver.pm = RHINE_PM_OPS, }; static struct dmi_system_id __initdata rhine_dmi_table[] = { diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index f45c85a8426..72a854f05bb 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -529,7 +529,7 @@ static int ixp4xx_mdio_register(void) mdio_bus->name = "IXP4xx MII Bus"; mdio_bus->read = &ixp4xx_mdio_read; mdio_bus->write = &ixp4xx_mdio_write; - strcpy(mdio_bus->id, "0"); + snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "ixp4xx-eth-0"); if ((err = mdiobus_register(mdio_bus))) mdiobus_free(mdio_bus); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 9663e0ba600..ba3c59147aa 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1159,7 +1159,7 @@ static void rx_timestamp_work(struct work_struct *work) } } spin_unlock_irqrestore(&dp83640->rx_lock, flags); - netif_rx(skb); + netif_rx_ni(skb); } /* Clear out expired time stamps. */ diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c index 1fa4d73c3cc..633680d0828 100644 --- a/drivers/net/phy/fixed.c +++ b/drivers/net/phy/fixed.c @@ -220,7 +220,7 @@ static int __init fixed_mdio_bus_init(void) goto err_mdiobus_reg; } - snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "0"); + snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "fixed-0"); fmb->mii_bus->name = "Fixed MDIO Bus"; fmb->mii_bus->priv = fmb; fmb->mii_bus->parent = &pdev->dev; diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 89c5a3eccc1..50e8e5e7446 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -116,7 +116,7 @@ static struct mii_bus * __devinit mdio_gpio_bus_init(struct device *dev, if (!new_bus->irq[i]) new_bus->irq[i] = PHY_POLL; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", bus_id); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id); if (gpio_request(bitbang->mdc, "mdc")) goto out_free_bus; diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c index bd12ba941be..826d961f39f 100644 --- a/drivers/net/phy/mdio-octeon.c +++ b/drivers/net/phy/mdio-octeon.c @@ -118,7 +118,8 @@ static int __devinit octeon_mdiobus_probe(struct platform_device *pdev) bus->mii_bus->priv = bus; bus->mii_bus->irq = bus->phy_irq; bus->mii_bus->name = "mdio-octeon"; - snprintf(bus->mii_bus->id, MII_BUS_ID_SIZE, "%x", bus->unit); + snprintf(bus->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + bus->mii_bus->name, bus->unit); bus->mii_bus->parent = &pdev->dev; bus->mii_bus->read = octeon_mdiobus_read; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 6c58da2b882..88cc5db9aff 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -37,22 +37,36 @@ #include <asm/uaccess.h> /** - * mdiobus_alloc - allocate a mii_bus structure + * mdiobus_alloc_size - allocate a mii_bus structure * * Description: called by a bus driver to allocate an mii_bus * structure to fill in. + * + * 'size' is an an extra amount of memory to allocate for private storage. + * If non-zero, then bus->priv is points to that memory. */ -struct mii_bus *mdiobus_alloc(void) +struct mii_bus *mdiobus_alloc_size(size_t size) { struct mii_bus *bus; + size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN); + size_t alloc_size; + + /* If we alloc extra space, it should be aligned */ + if (size) + alloc_size = aligned_size + size; + else + alloc_size = sizeof(*bus); - bus = kzalloc(sizeof(*bus), GFP_KERNEL); - if (bus != NULL) + bus = kzalloc(alloc_size, GFP_KERNEL); + if (bus) { bus->state = MDIOBUS_ALLOCATED; + if (size) + bus->priv = (void *)bus + aligned_size; + } return bus; } -EXPORT_SYMBOL(mdiobus_alloc); +EXPORT_SYMBOL(mdiobus_alloc_size); /** * mdiobus_release - mii_bus device release callback diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index c1c9293c2bb..df884dde2a5 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -585,8 +585,8 @@ static int pptp_create(struct net *net, struct socket *sock) po = pppox_sk(sk); opt = &po->proto.pptp; - opt->seq_sent = 0; opt->seq_recv = 0; - opt->ack_recv = 0; opt->ack_sent = 0; + opt->seq_sent = 0; opt->seq_recv = 0xffffffff; + opt->ack_recv = 0; opt->ack_sent = 0xffffffff; error = 0; out: diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index d0937c4634c..8e84f5bdd6c 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -978,6 +978,7 @@ static int ax88772_link_reset(struct usbnet *dev) static int ax88772_reset(struct usbnet *dev) { + struct asix_data *data = (struct asix_data *)&dev->data; int ret, embd_phy; u16 rx_ctl; @@ -1055,6 +1056,13 @@ static int ax88772_reset(struct usbnet *dev) goto out; } + /* Rewrite MAC address */ + memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, + data->mac_addr); + if (ret < 0) + goto out; + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); if (ret < 0) @@ -1320,6 +1328,13 @@ static int ax88178_reset(struct usbnet *dev) if (ret < 0) return ret; + /* Rewrite MAC address */ + memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, + data->mac_addr); + if (ret < 0) + return ret; + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); if (ret < 0) return ret; diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 2589b38b689..2b0bfb8cca0 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -46,7 +46,7 @@ static const int m2ThreshExt_off = 127; * @chan: * * This is the function to change channel on single-chip devices, that is - * all devices after ar9280. + * for AR9300 family of chipsets. * * This function takes the channel value in MHz and sets * hardware channel value. Assumes writes have been enabled to analog bus. diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index b30e9fc6433..171ccf7c972 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -679,7 +679,6 @@ void ath9k_deinit_device(struct ath_softc *sc); void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw); void ath9k_reload_chainmask_settings(struct ath_softc *sc); -void ath_radio_disable(struct ath_softc *sc, struct ieee80211_hw *hw); bool ath9k_uses_beacons(int type); #ifdef CONFIG_ATH9K_PCI diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c index 172e33db7f4..2f4b48e6fb0 100644 --- a/drivers/net/wireless/ath/ath9k/calib.c +++ b/drivers/net/wireless/ath/ath9k/calib.c @@ -400,6 +400,7 @@ bool ath9k_hw_getnf(struct ath_hw *ah, struct ath9k_channel *chan) ah->noise = ath9k_hw_getchan_noise(ah, chan); return true; } +EXPORT_SYMBOL(ath9k_hw_getnf); void ath9k_init_nfcal_hist_buffer(struct ath_hw *ah, struct ath9k_channel *chan) diff --git a/drivers/net/wireless/ath/ath9k/calib.h b/drivers/net/wireless/ath/ath9k/calib.h index 05b9dbf8185..3b33996d97d 100644 --- a/drivers/net/wireless/ath/ath9k/calib.h +++ b/drivers/net/wireless/ath/ath9k/calib.h @@ -19,7 +19,6 @@ #include "hw.h" -#define AR_PHY_CCA_FILTERWINDOW_LENGTH_INIT 3 #define AR_PHY_CCA_FILTERWINDOW_LENGTH 5 #define NUM_NF_READINGS 6 diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index e267c92dbfb..4a00806e285 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1629,7 +1629,6 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { struct ieee80211_channel *curchan = hw->conf.channel; - struct ath9k_channel old_chan; int pos = curchan->hw_value; int old_pos = -1; unsigned long flags; @@ -1654,11 +1653,8 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) * Preserve the current channel values, before updating * the same channel */ - if (old_pos == pos) { - memcpy(&old_chan, &sc->sc_ah->channels[pos], - sizeof(struct ath9k_channel)); - ah->curchan = &old_chan; - } + if (ah->curchan && (old_pos == pos)) + ath9k_hw_getnf(ah, ah->curchan); ath9k_cmn_update_ichannel(&sc->sc_ah->channels[pos], curchan, conf->channel_type); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index 5a002a21f10..f7eeee1dcdb 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -3119,8 +3119,10 @@ static int brcmf_sdbrcm_write_vars(struct brcmf_sdio *bus) /* Verify NVRAM bytes */ brcmf_dbg(INFO, "Compare NVRAM dl & ul; varsize=%d\n", varsize); nvram_ularray = kmalloc(varsize, GFP_ATOMIC); - if (!nvram_ularray) + if (!nvram_ularray) { + kfree(vbuffer); return -ENOMEM; + } /* Upload image to verify downloaded contents. */ memset(nvram_ularray, 0xaa, varsize); diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/fw.c b/drivers/net/wireless/rtlwifi/rtl8192se/fw.c index 6f91a148c22..3fda6b1dcf4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/fw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/fw.c @@ -196,6 +196,8 @@ static bool _rtl92s_firmware_downloadcode(struct ieee80211_hw *hw, /* Allocate skb buffer to contain firmware */ /* info and tx descriptor info. */ skb = dev_alloc_skb(frag_length); + if (!skb) + return false; skb_reserve(skb, extra_descoffset); seg_ptr = (u8 *)skb_put(skb, (u32)(frag_length - extra_descoffset)); @@ -573,6 +575,8 @@ static bool _rtl92s_firmware_set_h2c_cmd(struct ieee80211_hw *hw, u8 h2c_cmd, len = _rtl92s_get_h2c_cmdlen(MAX_TRANSMIT_BUFFER_SIZE, 1, &cmd_len); skb = dev_alloc_skb(len); + if (!skb) + return false; cb_desc = (struct rtl_tcb_desc *)(skb->cb); cb_desc->queue_index = TXCMD_QUEUE; cb_desc->cmd_or_init = DESC_PACKET_TYPE_NORMAL; diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index d0b597b5039..0cb64f50cec 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -3404,8 +3404,8 @@ static int __init parport_init_mode_setup(char *str) #endif #ifdef MODULE -static const char *irq[PARPORT_PC_MAX_PORTS]; -static const char *dma[PARPORT_PC_MAX_PORTS]; +static char *irq[PARPORT_PC_MAX_PORTS]; +static char *dma[PARPORT_PC_MAX_PORTS]; MODULE_PARM_DESC(io, "Base I/O address (SPP regs)"); module_param_array(io, int, NULL, 0); diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index e91b8ddc279..c9b92531ae6 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -16,8 +16,8 @@ #include <linux/module.h> #include <linux/err.h> #include <linux/platform_device.h> -#include <linux/mfd/ab8500.h> #include <linux/mfd/abx500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> #include <linux/regulator/ab8500.h> diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index a0a9810adf0..4bcf9ca2818 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -15,7 +15,7 @@ #include <linux/platform_device.h> #include <linux/rtc.h> #include <linux/mfd/abx500.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #include <linux/delay.h> #define AB8500_RTC_SOFF_STAT_REG 0x00 diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 4a5529346b4..2d71943bc43 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -261,6 +261,8 @@ static int __devinit max8925_rtc_probe(struct platform_device *pdev) /* XXX - isn't this redundant? */ platform_set_drvdata(pdev, info); + device_init_wakeup(&pdev->dev, 1); + info->rtc_dev = rtc_device_register("max8925-rtc", &pdev->dev, &max8925_rtc_ops, THIS_MODULE); ret = PTR_ERR(info->rtc_dev); @@ -290,10 +292,34 @@ static int __devexit max8925_rtc_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int max8925_rtc_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent); + + if (device_may_wakeup(dev)) + chip->wakeup_flag |= 1 << MAX8925_IRQ_RTC_ALARM0; + return 0; +} +static int max8925_rtc_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent); + + if (device_may_wakeup(dev)) + chip->wakeup_flag &= ~(1 << MAX8925_IRQ_RTC_ALARM0); + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(max8925_rtc_pm_ops, max8925_rtc_suspend, max8925_rtc_resume); + static struct platform_driver max8925_rtc_driver = { .driver = { .name = "max8925-rtc", .owner = THIS_MODULE, + .pm = &max8925_rtc_pm_ops, }, .probe = max8925_rtc_probe, .remove = __devexit_p(max8925_rtc_remove), diff --git a/drivers/usb/otg/ab8500-usb.c b/drivers/usb/otg/ab8500-usb.c index 07ccea9ada4..74fe6e62e0f 100644 --- a/drivers/usb/otg/ab8500-usb.c +++ b/drivers/usb/otg/ab8500-usb.c @@ -30,7 +30,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/mfd/abx500.h> -#include <linux/mfd/ab8500.h> +#include <linux/mfd/abx500/ab8500.h> #define AB8500_MAIN_WD_CTRL_REG 0x01 #define AB8500_USB_LINE_STAT_REG 0x80 diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index 081dc474527..fe13ac567d5 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -81,7 +81,7 @@ static int vram __devinitdata = 0; static int bpp __devinitdata = 8; static int reverse_i2c __devinitdata; #ifdef CONFIG_MTRR -static int nomtrr __devinitdata = 0; +static bool nomtrr __devinitdata = false; #endif #ifdef CONFIG_PMAC_BACKLIGHT static int backlight __devinitdata = 1; @@ -1509,7 +1509,7 @@ static int __devinit nvidiafb_setup(char *options) backlight = simple_strtoul(this_opt+10, NULL, 0); #ifdef CONFIG_MTRR } else if (!strncmp(this_opt, "nomtrr", 6)) { - nomtrr = 1; + nomtrr = true; #endif } else if (!strncmp(this_opt, "fpdither:", 9)) { fpdither = simple_strtol(this_opt+9, NULL, 0); @@ -1599,7 +1599,7 @@ MODULE_PARM_DESC(bpp, "pixel width in bits" module_param(reverse_i2c, int, 0); MODULE_PARM_DESC(reverse_i2c, "reverse port assignment of the i2c bus"); #ifdef CONFIG_MTRR -module_param(nomtrr, bool, 0); +module_param(nomtrr, bool, false); MODULE_PARM_DESC(nomtrr, "Disables MTRR support (0 or 1=disabled) " "(default=0)"); #endif @@ -476,14 +476,21 @@ static void kiocb_batch_init(struct kiocb_batch *batch, long total) batch->count = total; } -static void kiocb_batch_free(struct kiocb_batch *batch) +static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) { struct kiocb *req, *n; + if (list_empty(&batch->head)) + return; + + spin_lock_irq(&ctx->ctx_lock); list_for_each_entry_safe(req, n, &batch->head, ki_batch) { list_del(&req->ki_batch); + list_del(&req->ki_list); kmem_cache_free(kiocb_cachep, req); + ctx->reqs_active--; } + spin_unlock_irq(&ctx->ctx_lock); } /* @@ -1742,7 +1749,7 @@ long do_io_submit(aio_context_t ctx_id, long nr, } blk_finish_plug(&plug); - kiocb_batch_free(&batch); + kiocb_batch_free(ctx, &batch); put_ioctx(ctx); return i ? i : ret; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 9ef5b291440..da8876d38a7 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -76,7 +76,7 @@ static int autofs4_write(struct autofs_sb_info *sbi, data += wr; bytes -= wr; } - mutex_lock(&sbi->pipe_mutex); + mutex_unlock(&sbi->pipe_mutex); set_fs(fs); diff --git a/fs/block_dev.c b/fs/block_dev.c index afe74dda632..0e575d1304b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1139,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; + bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; @@ -1159,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; + bdev->bd_queue = NULL; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); put_disk(disk); @@ -1232,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; + bdev->bd_queue = NULL; bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f99a099a774..d8525662ca7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, #ifdef CONFIG_MIGRATION static int btree_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { /* * we can't safely write a btree page from here, @@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping, if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } #endif diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 74fd74719dc..618246bc219 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -973,7 +973,7 @@ static int dentry_lease_is_valid(struct dentry *dentry) spin_lock(&dentry->d_lock); di = ceph_dentry(dentry); - if (di && di->lease_session) { + if (di->lease_session) { s = di->lease_session; spin_lock(&s->s_cap_lock); gen = s->s_cap_gen; @@ -1072,13 +1072,11 @@ static void ceph_d_release(struct dentry *dentry) struct ceph_dentry_info *di = ceph_dentry(dentry); dout("d_release %p\n", dentry); - if (di) { - ceph_dentry_lru_del(dentry); - if (di->lease_session) - ceph_put_mds_session(di->lease_session); - kmem_cache_free(ceph_dentry_cachep, di); - dentry->d_fsdata = NULL; - } + ceph_dentry_lru_del(dentry); + if (di->lease_session) + ceph_put_mds_session(di->lease_session); + kmem_cache_free(ceph_dentry_cachep, di); + dentry->d_fsdata = NULL; } static int ceph_snapdir_d_revalidate(struct dentry *dentry, @@ -1096,17 +1094,36 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry, */ void ceph_dir_set_complete(struct inode *inode) { - /* not yet implemented */ + struct dentry *dentry = d_find_any_alias(inode); + + if (dentry && ceph_dentry(dentry) && + ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) { + dout(" marking %p (%p) complete\n", inode, dentry); + set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); + } + dput(dentry); } void ceph_dir_clear_complete(struct inode *inode) { - /* not yet implemented */ + struct dentry *dentry = d_find_any_alias(inode); + + if (dentry && ceph_dentry(dentry)) { + dout(" marking %p (%p) complete\n", inode, dentry); + set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); + } + dput(dentry); } bool ceph_dir_test_complete(struct inode *inode) { - /* not yet implemented */ + struct dentry *dentry = d_find_any_alias(inode); + + if (dentry && ceph_dentry(dentry)) { + dout(" marking %p (%p) NOT complete\n", inode, dentry); + clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); + } + dput(dentry); return false; } @@ -1220,6 +1237,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, do { ceph_mdsc_get_request(req); spin_unlock(&ci->i_unsafe_lock); + dout("dir_fsync %p wait on tid %llu (until %llu)\n", inode, req->r_tid, last_tid); if (req->r_timeout) { @@ -1232,9 +1250,9 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, } else { wait_for_completion(&req->r_safe_completion); } - spin_lock(&ci->i_unsafe_lock); ceph_mdsc_put_request(req); + spin_lock(&ci->i_unsafe_lock); if (ret || list_empty(head)) break; req = list_entry(head->next, @@ -1259,13 +1277,11 @@ void ceph_dentry_lru_add(struct dentry *dn) dout("dentry_lru_add %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); - if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_add_tail(&di->lru, &mdsc->dentry_lru); - mdsc->num_dentry++; - spin_unlock(&mdsc->dentry_lru_lock); - } + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_lru_lock); + list_add_tail(&di->lru, &mdsc->dentry_lru); + mdsc->num_dentry++; + spin_unlock(&mdsc->dentry_lru_lock); } void ceph_dentry_lru_touch(struct dentry *dn) @@ -1275,12 +1291,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, dn->d_name.len, dn->d_name.name, di->offset); - if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_move_tail(&di->lru, &mdsc->dentry_lru); - spin_unlock(&mdsc->dentry_lru_lock); - } + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_lru_lock); + list_move_tail(&di->lru, &mdsc->dentry_lru); + spin_unlock(&mdsc->dentry_lru_lock); } void ceph_dentry_lru_del(struct dentry *dn) @@ -1290,13 +1304,11 @@ void ceph_dentry_lru_del(struct dentry *dn) dout("dentry_lru_del %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); - if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_del_init(&di->lru); - mdsc->num_dentry--; - spin_unlock(&mdsc->dentry_lru_lock); - } + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_lru_lock); + list_del_init(&di->lru); + mdsc->num_dentry--; + spin_unlock(&mdsc->dentry_lru_lock); } /* diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9fbcdecaacc..fbb2a643ef1 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -56,9 +56,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, return -EINVAL; spin_lock(&dentry->d_lock); - parent = dget(dentry->d_parent); - spin_unlock(&dentry->d_lock); - + parent = dentry->d_parent; if (*max_len >= connected_handle_length) { dout("encode_fh %p connectable\n", dentry); cfh->ino = ceph_ino(dentry->d_inode); @@ -81,7 +79,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, *max_len = handle_length; type = 255; } - dput(parent); + spin_unlock(&dentry->d_lock); return type; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 25283e7a37f..2c489378b4c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -850,11 +850,12 @@ static void ceph_set_dentry_offset(struct dentry *dn) { struct dentry *dir = dn->d_parent; struct inode *inode = dir->d_inode; - struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_inode_info *ci; struct ceph_dentry_info *di; BUG_ON(!inode); + ci = ceph_inode(inode); di = ceph_dentry(dn); spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6203d805eb4..23ab6a3f182 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2772,7 +2772,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, di = ceph_dentry(dentry); switch (h->action) { case CEPH_MDS_LEASE_REVOKE: - if (di && di->lease_session == session) { + if (di->lease_session == session) { if (ceph_seq_cmp(di->lease_seq, seq) > 0) h->seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); @@ -2781,7 +2781,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, break; case CEPH_MDS_LEASE_RENEW: - if (di && di->lease_session == session && + if (di->lease_session == session && di->lease_gen == session->s_cap_gen && di->lease_renew_from && di->lease_renew_after == 0) { diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 48f61a12af6..00de2c9568c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -131,6 +131,8 @@ enum { Opt_rbytes, Opt_norbytes, Opt_noasyncreaddir, + Opt_dcache, + Opt_nodcache, Opt_ino32, }; @@ -152,6 +154,8 @@ static match_table_t fsopt_tokens = { {Opt_rbytes, "rbytes"}, {Opt_norbytes, "norbytes"}, {Opt_noasyncreaddir, "noasyncreaddir"}, + {Opt_dcache, "dcache"}, + {Opt_nodcache, "nodcache"}, {Opt_ino32, "ino32"}, {-1, NULL} }; @@ -231,6 +235,12 @@ static int parse_fsopt_token(char *c, void *private) case Opt_noasyncreaddir: fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; break; + case Opt_dcache: + fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; + break; + case Opt_nodcache: + fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; + break; case Opt_ino32: fsopt->flags |= CEPH_MOUNT_OPT_INO32; break; @@ -377,6 +387,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",norbytes"); if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) seq_puts(m, ",noasyncreaddir"); + if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) + seq_puts(m, ",dcache"); + else + seq_puts(m, ",nodcache"); if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); @@ -647,10 +661,10 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, root = ERR_PTR(-ENOMEM); goto out; } - ceph_init_dentry(root); } else { root = d_obtain_alias(inode); } + ceph_init_dentry(root); dout("open_root_inode success, root dentry is %p\n", root); } else { root = ERR_PTR(err); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cb3652b3727..1421f3d875a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -28,6 +28,7 @@ #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ +#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a5e36e4488a..857214ae8c0 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -818,6 +818,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); int issued; int err; + int required_blob_size; int dirty; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -833,14 +834,34 @@ int ceph_removexattr(struct dentry *dentry, const char *name) return -EOPNOTSUPP; } + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); __build_xattrs(inode); +retry: issued = __ceph_caps_issued(ci, NULL); dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); if (!(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + required_blob_size = __get_required_blob_size(ci, 0, 0); + + if (!ci->i_xattrs.prealloc_blob || + required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { + struct ceph_buffer *blob; + + spin_unlock(&ci->i_ceph_lock); + dout(" preaallocating new blob size=%d\n", required_blob_size); + blob = ceph_buffer_new(required_blob_size, GFP_NOFS); + if (!blob) + goto out; + spin_lock(&ci->i_ceph_lock); + if (ci->i_xattrs.prealloc_blob) + ceph_buffer_put(ci->i_xattrs.prealloc_blob); + ci->i_xattrs.prealloc_blob = blob; + goto retry; + } + err = __remove_xattr_by_name(ceph_inode(inode), name); dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); ci->i_xattrs.dirty = true; @@ -853,6 +874,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) do_sync: spin_unlock(&ci->i_ceph_lock); err = ceph_send_removexattr(dentry, name); +out: return err; } diff --git a/fs/dcache.c b/fs/dcache.c index 616fedff011..16a53cc2cc0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1475,7 +1475,14 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } -static struct dentry * d_find_any_alias(struct inode *inode) +/** + * d_find_any_alias - find any alias for a given inode + * @inode: inode to find an alias for + * + * If any aliases exist for the given inode, take and return a + * reference for one of them. If no aliases exist, return %NULL. + */ +struct dentry *d_find_any_alias(struct inode *inode) { struct dentry *de; @@ -1484,7 +1491,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) spin_unlock(&inode->i_lock); return de; } - +EXPORT_SYMBOL(d_find_any_alias); /** * d_obtain_alias - find or allocate a dentry for a given inode diff --git a/fs/direct-io.c b/fs/direct-io.c index d740ab67ff6..4a588dbd11b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -36,6 +36,7 @@ #include <linux/rwsem.h> #include <linux/uio.h> #include <linux/atomic.h> +#include <linux/prefetch.h> /* * How many user pages to map in one call to get_user_pages(). This determines @@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, { int ret; sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ + sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ unsigned long fs_count; /* Number of filesystem-sized blocks */ - unsigned long dio_count;/* Number of dio_block-sized blocks */ - unsigned long blkmask; int create; /* @@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, if (ret == 0) { BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); fs_startblk = sdio->block_in_file >> sdio->blkfactor; - dio_count = sdio->final_block_in_request - sdio->block_in_file; - fs_count = dio_count >> sdio->blkfactor; - blkmask = (1 << sdio->blkfactor) - 1; - if (dio_count & blkmask) - fs_count++; + fs_endblk = (sdio->final_block_in_request - 1) >> + sdio->blkfactor; + fs_count = fs_endblk - fs_startblk + 1; map_bh->b_state = 0; map_bh->b_size = fs_count << dio->inode->i_blkbits; @@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio) * individual fields and will generate much worse code. This is important * for the whole file. */ -ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +static inline ssize_t +do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) @@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, size_t size; unsigned long addr; unsigned blkbits = inode->i_blkbits; - unsigned bdev_blkbits = 0; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; loff_t end = offset; @@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (rw & WRITE) rw = WRITE_ODIRECT; - if (bdev) - bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); + /* + * Avoid references to bdev if not absolutely needed to give + * the early prefetch in the caller enough time. + */ if (offset & blocksize_mask) { if (bdev) - blkbits = bdev_blkbits; + blkbits = blksize_bits(bdev_logical_block_size(bdev)); blocksize_mask = (1 << blkbits) - 1; if (offset & blocksize_mask) goto out; @@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, addr = (unsigned long)iov[seg].iov_base; size = iov[seg].iov_len; end += size; - if ((addr & blocksize_mask) || (size & blocksize_mask)) { + if (unlikely((addr & blocksize_mask) || + (size & blocksize_mask))) { if (bdev) - blkbits = bdev_blkbits; + blkbits = blksize_bits( + bdev_logical_block_size(bdev)); blocksize_mask = (1 << blkbits) - 1; - if ((addr & blocksize_mask) || (size & blocksize_mask)) + if ((addr & blocksize_mask) || (size & blocksize_mask)) goto out; } } @@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, out: return retval; } + +ssize_t +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags) +{ + /* + * The block device state is needed in the end to finally + * submit everything. Since it's likely to be cache cold + * prefetch it here as first thing to hide some of the + * latency. + * + * Attempt to prefetch the pieces we likely need later. + */ + prefetch(&bdev->bd_disk->part_tbl); + prefetch(bdev->bd_queue); + prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); + + return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_block, end_io, + submit_io, flags); +} + EXPORT_SYMBOL(__blockdev_direct_IO); static __init int dio_init(void) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 828e750af23..aabdfc38cf2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -197,6 +197,12 @@ struct eventpoll { /* The user that created the eventpoll descriptor */ struct user_struct *user; + + struct file *file; + + /* used to optimize loop detection check */ + int visited; + struct list_head visited_list_link; }; /* Wait structure used by the poll hooks */ @@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; +/* Visited nodes during ep_loop_check(), so we can unset them when we finish */ +static LIST_HEAD(visited_list); + +/* + * List of files with newly added links, where we may need to limit the number + * of emanating paths. Protected by the epmutex. + */ +static LIST_HEAD(tfile_check_list); + #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -276,6 +291,12 @@ ctl_table epoll_table[] = { }; #endif /* CONFIG_SYSCTL */ +static const struct file_operations eventpoll_fops; + +static inline int is_file_epoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, @@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = { .llseek = noop_llseek, }; -/* Fast test to see if the file is an eventpoll file */ -static inline int is_file_epoll(struct file *f) -{ - return f->f_op == &eventpoll_fops; -} - /* * This is called from eventpoll_release() to unlink files from the eventpoll * interface. We need to have this facility to cleanup correctly files that are @@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } + + +#define PATH_ARR_SIZE 5 +/* + * These are the number paths of length 1 to 5, that we are allowing to emanate + * from a single file of interest. For example, we allow 1000 paths of length + * 1, to emanate from each file of interest. This essentially represents the + * potential wakeup paths, which need to be limited in order to avoid massive + * uncontrolled wakeup storms. The common use case should be a single ep which + * is connected to n file sources. In this case each file source has 1 path + * of length 1. Thus, the numbers below should be more than sufficient. These + * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify + * and delete can't add additional paths. Protected by the epmutex. + */ +static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; +static int path_count[PATH_ARR_SIZE]; + +static int path_count_inc(int nests) +{ + if (++path_count[nests] > path_limits[nests]) + return -1; + return 0; +} + +static void path_count_init(void) +{ + int i; + + for (i = 0; i < PATH_ARR_SIZE; i++) + path_count[i] = 0; +} + +static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) +{ + int error = 0; + struct file *file = priv; + struct file *child_file; + struct epitem *epi; + + list_for_each_entry(epi, &file->f_ep_links, fllink) { + child_file = epi->ep->file; + if (is_file_epoll(child_file)) { + if (list_empty(&child_file->f_ep_links)) { + if (path_count_inc(call_nests)) { + error = -1; + break; + } + } else { + error = ep_call_nested(&poll_loop_ncalls, + EP_MAX_NESTS, + reverse_path_check_proc, + child_file, child_file, + current); + } + if (error != 0) + break; + } else { + printk(KERN_ERR "reverse_path_check_proc: " + "file is not an ep!\n"); + } + } + return error; +} + +/** + * reverse_path_check - The tfile_check_list is list of file *, which have + * links that are proposed to be newly added. We need to + * make sure that those added links don't add too many + * paths such that we will spend all our time waking up + * eventpoll objects. + * + * Returns: Returns zero if the proposed links don't create too many paths, + * -1 otherwise. + */ +static int reverse_path_check(void) +{ + int length = 0; + int error = 0; + struct file *current_file; + + /* let's call this for all tfiles */ + list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { + length++; + path_count_init(); + error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + reverse_path_check_proc, current_file, + current_file, current); + if (error) + break; + } + return error; +} + /* * Must be called with "mtx" held. */ @@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, */ ep_rbtree_insert(ep, epi); + /* now check if we've created too many backpaths */ + error = -EINVAL; + if (reverse_path_check()) + goto error_remove_epi; + /* We have to drop the new item inside our item list to keep track of it */ spin_lock_irqsave(&ep->lock, flags); @@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, return 0; +error_remove_epi: + spin_lock(&tfile->f_lock); + if (ep_is_linked(&epi->fllink)) + list_del_init(&epi->fllink); + spin_unlock(&tfile->f_lock); + + rb_erase(&epi->rbn, &ep->rbr); + error_unregister: ep_unregister_pollwait(ep, epi); @@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) int error = 0; struct file *file = priv; struct eventpoll *ep = file->private_data; + struct eventpoll *ep_tovisit; struct rb_node *rbp; struct epitem *epi; mutex_lock_nested(&ep->mtx, call_nests + 1); + ep->visited = 1; + list_add(&ep->visited_list_link, &visited_list); for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { + ep_tovisit = epi->ffd.file->private_data; + if (ep_tovisit->visited) + continue; error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, - ep_loop_check_proc, epi->ffd.file, - epi->ffd.file->private_data, current); + ep_loop_check_proc, epi->ffd.file, + ep_tovisit, current); if (error != 0) break; + } else { + /* + * If we've reached a file that is not associated with + * an ep, then we need to check if the newly added + * links are going to add too many wakeup paths. We do + * this by adding it to the tfile_check_list, if it's + * not already there, and calling reverse_path_check() + * during ep_insert(). + */ + if (list_empty(&epi->ffd.file->f_tfile_llink)) + list_add(&epi->ffd.file->f_tfile_llink, + &tfile_check_list); } } mutex_unlock(&ep->mtx); @@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + int ret; + struct eventpoll *ep_cur, *ep_next; + + ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, ep_loop_check_proc, file, ep, current); + /* clear visited list */ + list_for_each_entry_safe(ep_cur, ep_next, &visited_list, + visited_list_link) { + ep_cur->visited = 0; + list_del(&ep_cur->visited_list_link); + } + return ret; +} + +static void clear_tfile_check_list(void) +{ + struct file *file; + + /* first clear the tfile_check_list */ + while (!list_empty(&tfile_check_list)) { + file = list_first_entry(&tfile_check_list, struct file, + f_tfile_llink); + list_del_init(&file->f_tfile_llink); + } + INIT_LIST_HEAD(&tfile_check_list); } /* @@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file) */ SYSCALL_DEFINE1(epoll_create1, int, flags) { - int error; + int error, fd; struct eventpoll *ep = NULL; + struct file *file; /* Check the EPOLL_* constant for consistency. */ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); @@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ - error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, + fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); + if (fd < 0) { + error = fd; + goto out_free_ep; + } + file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, O_RDWR | (flags & O_CLOEXEC)); - if (error < 0) - ep_free(ep); - + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto out_free_fd; + } + fd_install(fd, file); + ep->file = file; + return fd; + +out_free_fd: + put_unused_fd(fd); +out_free_ep: + ep_free(ep); return error; } @@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* * When we insert an epoll file descriptor, inside another epoll file * descriptor, there is the change of creating closed loops, which are - * better be handled here, than in more critical paths. + * better be handled here, than in more critical paths. While we are + * checking for loops we also determine the list of files reachable + * and hang them on the tfile_check_list, so we can check that we + * haven't created too many possible wakeup paths. * - * We hold epmutex across the loop check and the insert in this case, in - * order to prevent two separate inserts from racing and each doing the - * insert "at the same time" such that ep_loop_check passes on both - * before either one does the insert, thereby creating a cycle. + * We need to hold the epmutex across both ep_insert and ep_remove + * b/c we want to make sure we are looking at a coherent view of + * epoll network. */ - if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { + if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { mutex_lock(&epmutex); did_lock_epmutex = 1; - error = -ELOOP; - if (ep_loop_check(ep, tfile) != 0) - goto error_tgt_fput; } - + if (op == EPOLL_CTL_ADD) { + if (is_file_epoll(tfile)) { + error = -ELOOP; + if (ep_loop_check(ep, tfile) != 0) + goto error_tgt_fput; + } else + list_add(&tfile->f_tfile_llink, &tfile_check_list); + } mutex_lock_nested(&ep->mtx, 0); @@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, error = ep_insert(ep, &epds, tfile, fd); } else error = -EEXIST; + clear_tfile_check_list(); break; case EPOLL_CTL_DEL: if (epi) @@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, mutex_unlock(&ep->mtx); error_tgt_fput: - if (unlikely(did_lock_epmutex)) + if (did_lock_epmutex) mutex_unlock(&epmutex); fput(tfile); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 88e8a23d002..376816fcd04 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) spin_lock(&gl->gl_spin); gl->gl_reply = ret; - if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); spin_unlock(&gl->gl_spin); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2553b858a72..307ac31df78 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -121,8 +121,11 @@ enum { struct lm_lockops { const char *lm_proto_name; - int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); - void (*lm_unmount) (struct gfs2_sbd *sdp); + int (*lm_mount) (struct gfs2_sbd *sdp, const char *table); + void (*lm_first_done) (struct gfs2_sbd *sdp); + void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, + unsigned int result); + void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e1d3bb59945..97742a7ea9c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -139,8 +139,45 @@ struct gfs2_bufdata { #define GDLM_STRNAME_BYTES 25 #define GDLM_LVB_SIZE 32 +/* + * ls_recover_flags: + * + * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been + * held by failed nodes whose journals need recovery. Those locks should + * only be used for journal recovery until the journal recovery is done. + * This is set by the dlm recover_prep callback and cleared by the + * gfs2_control thread when journal recovery is complete. To avoid + * races between recover_prep setting and gfs2_control clearing, recover_spin + * is held while changing this bit and reading/writing recover_block + * and recover_start. + * + * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used. + * + * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing + * recovery of all journals before allowing other nodes to mount the fs. + * This is cleared when FIRST_MOUNT_DONE is set. + * + * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished + * recovery of all journals, and now allows other nodes to mount the fs. + * + * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared + * BLOCK_LOCKS for the first time. The gfs2_control thread should now + * control clearing BLOCK_LOCKS for further recoveries. + * + * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq. + * + * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep() + * and recover_done(), i.e. set while recover_block == recover_start. + */ + enum { DFL_BLOCK_LOCKS = 0, + DFL_NO_DLM_OPS = 1, + DFL_FIRST_MOUNT = 2, + DFL_FIRST_MOUNT_DONE = 3, + DFL_MOUNT_DONE = 4, + DFL_UNMOUNT = 5, + DFL_DLM_RECOVERY = 6, }; struct lm_lockname { @@ -392,6 +429,7 @@ struct gfs2_jdesc { #define JDF_RECOVERY 1 unsigned int jd_jid; unsigned int jd_blocks; + int jd_recover_error; }; struct gfs2_statfs_change_host { @@ -461,6 +499,7 @@ enum { SDF_NORECOVERY = 4, SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, + SDF_RORECOVERY = 7, /* read only recovery */ }; #define GFS2_FSNAME_LEN 256 @@ -499,14 +538,26 @@ struct gfs2_sb_host { struct lm_lockstruct { int ls_jid; unsigned int ls_first; - unsigned int ls_first_done; unsigned int ls_nodir; const struct lm_lockops *ls_ops; - unsigned long ls_flags; dlm_lockspace_t *ls_dlm; - int ls_recover_jid_done; - int ls_recover_jid_status; + int ls_recover_jid_done; /* These two are deprecated, */ + int ls_recover_jid_status; /* used previously by gfs_controld */ + + struct dlm_lksb ls_mounted_lksb; /* mounted_lock */ + struct dlm_lksb ls_control_lksb; /* control_lock */ + char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ + struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ + + spinlock_t ls_recover_spin; /* protects following fields */ + unsigned long ls_recover_flags; /* DFL_ */ + uint32_t ls_recover_mount; /* gen in first recover_done cb */ + uint32_t ls_recover_start; /* gen in last recover_done cb */ + uint32_t ls_recover_block; /* copy recover_start in last recover_prep */ + uint32_t ls_recover_size; /* size of recover_submit, recover_result */ + uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */ + uint32_t *ls_recover_result; /* result of last jid recovery */ }; struct gfs2_sbd { @@ -544,6 +595,7 @@ struct gfs2_sbd { wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; + struct delayed_work sd_control_work; /* Inode Stuff */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 017960cf1d7..a7d611b93f0 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -599,9 +599,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - inc_nlink(&ip->i_inode); - if (S_ISDIR(ip->i_inode.i_mode)) - inc_nlink(&ip->i_inode); + set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index ce85b62bc0a..8944d1e32ab 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. + * Copyright 2004-2011 Red Hat, Inc. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -11,12 +11,15 @@ #include <linux/dlm.h> #include <linux/slab.h> #include <linux/types.h> +#include <linux/delay.h> #include <linux/gfs2_ondisk.h> #include "incore.h" #include "glock.h" #include "util.h" +#include "sys.h" +extern struct workqueue_struct *gfs2_control_wq; static void gdlm_ast(void *arg) { @@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl) dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); } -static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname) +/* + * dlm/gfs2 recovery coordination using dlm_recover callbacks + * + * 1. dlm_controld sees lockspace members change + * 2. dlm_controld blocks dlm-kernel locking activity + * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) + * 4. dlm_controld starts and finishes its own user level recovery + * 5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery + * 6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot) + * 7. dlm_recoverd does its own lock recovery + * 8. dlm_recoverd unblocks dlm-kernel locking activity + * 9. dlm_recoverd notifies gfs2 when done (recover_done with new generation) + * 10. gfs2_control updates control_lock lvb with new generation and jid bits + * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none) + * 12. gfs2_recover dequeues and recovers journals of failed nodes + * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result) + * 14. gfs2_control updates control_lock lvb jid bits for recovered journals + * 15. gfs2_control unblocks normal locking when all journals are recovered + * + * - failures during recovery + * + * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control + * clears BLOCK_LOCKS (step 15), e.g. another node fails while still + * recovering for a prior failure. gfs2_control needs a way to detect + * this so it can leave BLOCK_LOCKS set in step 15. This is managed using + * the recover_block and recover_start values. + * + * recover_done() provides a new lockspace generation number each time it + * is called (step 9). This generation number is saved as recover_start. + * When recover_prep() is called, it sets BLOCK_LOCKS and sets + * recover_block = recover_start. So, while recover_block is equal to + * recover_start, BLOCK_LOCKS should remain set. (recover_spin must + * be held around the BLOCK_LOCKS/recover_block/recover_start logic.) + * + * - more specific gfs2 steps in sequence above + * + * 3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start + * 6. recover_slot records any failed jids (maybe none) + * 9. recover_done sets recover_start = new generation number + * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids + * 12. gfs2_recover does journal recoveries for failed jids identified above + * 14. gfs2_control clears control_lock lvb bits for recovered jids + * 15. gfs2_control checks if recover_block == recover_start (step 3 occured + * again) then do nothing, otherwise if recover_start > recover_block + * then clear BLOCK_LOCKS. + * + * - parallel recovery steps across all nodes + * + * All nodes attempt to update the control_lock lvb with the new generation + * number and jid bits, but only the first to get the control_lock EX will + * do so; others will see that it's already done (lvb already contains new + * generation number.) + * + * . All nodes get the same recover_prep/recover_slot/recover_done callbacks + * . All nodes attempt to set control_lock lvb gen + bits for the new gen + * . One node gets control_lock first and writes the lvb, others see it's done + * . All nodes attempt to recover jids for which they see control_lock bits set + * . One node succeeds for a jid, and that one clears the jid bit in the lvb + * . All nodes will eventually see all lvb bits clear and unblock locks + * + * - is there a problem with clearing an lvb bit that should be set + * and missing a journal recovery? + * + * 1. jid fails + * 2. lvb bit set for step 1 + * 3. jid recovered for step 1 + * 4. jid taken again (new mount) + * 5. jid fails (for step 4) + * 6. lvb bit set for step 5 (will already be set) + * 7. lvb bit cleared for step 3 + * + * This is not a problem because the failure in step 5 does not + * require recovery, because the mount in step 4 could not have + * progressed far enough to unblock locks and access the fs. The + * control_mount() function waits for all recoveries to be complete + * for the latest lockspace generation before ever unblocking locks + * and returning. The mount in step 4 waits until the recovery in + * step 1 is done. + * + * - special case of first mounter: first node to mount the fs + * + * The first node to mount a gfs2 fs needs to check all the journals + * and recover any that need recovery before other nodes are allowed + * to mount the fs. (Others may begin mounting, but they must wait + * for the first mounter to be done before taking locks on the fs + * or accessing the fs.) This has two parts: + * + * 1. The mounted_lock tells a node it's the first to mount the fs. + * Each node holds the mounted_lock in PR while it's mounted. + * Each node tries to acquire the mounted_lock in EX when it mounts. + * If a node is granted the mounted_lock EX it means there are no + * other mounted nodes (no PR locks exist), and it is the first mounter. + * The mounted_lock is demoted to PR when first recovery is done, so + * others will fail to get an EX lock, but will get a PR lock. + * + * 2. The control_lock blocks others in control_mount() while the first + * mounter is doing first mount recovery of all journals. + * A mounting node needs to acquire control_lock in EX mode before + * it can proceed. The first mounter holds control_lock in EX while doing + * the first mount recovery, blocking mounts from other nodes, then demotes + * control_lock to NL when it's done (others_may_mount/first_done), + * allowing other nodes to continue mounting. + * + * first mounter: + * control_lock EX/NOQUEUE success + * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters) + * set first=1 + * do first mounter recovery + * mounted_lock EX->PR + * control_lock EX->NL, write lvb generation + * + * other mounter: + * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry) + * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR) + * mounted_lock PR/NOQUEUE success + * read lvb generation + * control_lock EX->NL + * set first=0 + * + * - mount during recovery + * + * If a node mounts while others are doing recovery (not first mounter), + * the mounting node will get its initial recover_done() callback without + * having seen any previous failures/callbacks. + * + * It must wait for all recoveries preceding its mount to be finished + * before it unblocks locks. It does this by repeating the "other mounter" + * steps above until the lvb generation number is >= its mount generation + * number (from initial recover_done) and all lvb bits are clear. + * + * - control_lock lvb format + * + * 4 bytes generation number: the latest dlm lockspace generation number + * from recover_done callback. Indicates the jid bitmap has been updated + * to reflect all slot failures through that generation. + * 4 bytes unused. + * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates + * that jid N needs recovery. + */ + +#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */ + +static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, + char *lvb_bits) +{ + uint32_t gen; + memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); + memcpy(&gen, lvb_bits, sizeof(uint32_t)); + *lvb_gen = le32_to_cpu(gen); +} + +static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, + char *lvb_bits) +{ + uint32_t gen; + memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); + gen = cpu_to_le32(lvb_gen); + memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); +} + +static int all_jid_bits_clear(char *lvb) +{ + int i; + for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { + if (lvb[i]) + return 0; + } + return 1; +} + +static void sync_wait_cb(void *arg) +{ + struct lm_lockstruct *ls = arg; + complete(&ls->ls_sync_wait); +} + +static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - if (fsname == NULL) { - fs_info(sdp, "no fsname found\n"); - return -EINVAL; + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + if (error) { + fs_err(sdp, "%s lkid %x error %d\n", + name, lksb->sb_lkid, error); + return error; + } + + wait_for_completion(&ls->ls_sync_wait); + + if (lksb->sb_status != -DLM_EUNLOCK) { + fs_err(sdp, "%s lkid %x status %d\n", + name, lksb->sb_lkid, lksb->sb_status); + return -1; + } + return 0; +} + +static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, + unsigned int num, struct dlm_lksb *lksb, char *name) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char strname[GDLM_STRNAME_BYTES]; + int error, status; + + memset(strname, 0, GDLM_STRNAME_BYTES); + snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); + + error = dlm_lock(ls->ls_dlm, mode, lksb, flags, + strname, GDLM_STRNAME_BYTES - 1, + 0, sync_wait_cb, ls, NULL); + if (error) { + fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", + name, lksb->sb_lkid, flags, mode, error); + return error; + } + + wait_for_completion(&ls->ls_sync_wait); + + status = lksb->sb_status; + + if (status && status != -EAGAIN) { + fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n", + name, lksb->sb_lkid, flags, mode, status); + } + + return status; +} + +static int mounted_unlock(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock"); +} + +static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK, + &ls->ls_mounted_lksb, "mounted_lock"); +} + +static int control_unlock(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock"); +} + +static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK, + &ls->ls_control_lksb, "control_lock"); +} + +static void gfs2_control_func(struct work_struct *work) +{ + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t block_gen, start_gen, lvb_gen, flags; + int recover_set = 0; + int write_lvb = 0; + int recover_size; + int i, error; + + spin_lock(&ls->ls_recover_spin); + /* + * No MOUNT_DONE means we're still mounting; control_mount() + * will set this flag, after which this thread will take over + * all further clearing of BLOCK_LOCKS. + * + * FIRST_MOUNT means this node is doing first mounter recovery, + * for which recovery control is handled by + * control_mount()/control_first_done(), not this thread. + */ + if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + block_gen = ls->ls_recover_block; + start_gen = ls->ls_recover_start; + spin_unlock(&ls->ls_recover_spin); + + /* + * Equal block_gen and start_gen implies we are between + * recover_prep and recover_done callbacks, which means + * dlm recovery is in progress and dlm locking is blocked. + * There's no point trying to do any work until recover_done. + */ + + if (block_gen == start_gen) + return; + + /* + * Propagate recover_submit[] and recover_result[] to lvb: + * dlm_recoverd adds to recover_submit[] jids needing recovery + * gfs2_recover adds to recover_result[] journal recovery results + * + * set lvb bit for jids in recover_submit[] if the lvb has not + * yet been updated for the generation of the failure + * + * clear lvb bit for jids in recover_result[] if the result of + * the journal recovery is SUCCESS + */ + + error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK); + if (error) { + fs_err(sdp, "control lock EX error %d\n", error); + return; + } + + control_lvb_read(ls, &lvb_gen, lvb_bits); + + spin_lock(&ls->ls_recover_spin); + if (block_gen != ls->ls_recover_block || + start_gen != ls->ls_recover_start) { + fs_info(sdp, "recover generation %u block1 %u %u\n", + start_gen, block_gen, ls->ls_recover_block); + spin_unlock(&ls->ls_recover_spin); + control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + return; + } + + recover_size = ls->ls_recover_size; + + if (lvb_gen <= start_gen) { + /* + * Clear lvb bits for jids we've successfully recovered. + * Because all nodes attempt to recover failed journals, + * a journal can be recovered multiple times successfully + * in succession. Only the first will really do recovery, + * the others find it clean, but still report a successful + * recovery. So, another node may have already recovered + * the jid and cleared the lvb bit for it. + */ + for (i = 0; i < recover_size; i++) { + if (ls->ls_recover_result[i] != LM_RD_SUCCESS) + continue; + + ls->ls_recover_result[i] = 0; + + if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) + continue; + + __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + write_lvb = 1; + } + } + + if (lvb_gen == start_gen) { + /* + * Failed slots before start_gen are already set in lvb. + */ + for (i = 0; i < recover_size; i++) { + if (!ls->ls_recover_submit[i]) + continue; + if (ls->ls_recover_submit[i] < lvb_gen) + ls->ls_recover_submit[i] = 0; + } + } else if (lvb_gen < start_gen) { + /* + * Failed slots before start_gen are not yet set in lvb. + */ + for (i = 0; i < recover_size; i++) { + if (!ls->ls_recover_submit[i]) + continue; + if (ls->ls_recover_submit[i] < start_gen) { + ls->ls_recover_submit[i] = 0; + __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + } + } + /* even if there are no bits to set, we need to write the + latest generation to the lvb */ + write_lvb = 1; + } else { + /* + * we should be getting a recover_done() for lvb_gen soon + */ + } + spin_unlock(&ls->ls_recover_spin); + + if (write_lvb) { + control_lvb_write(ls, start_gen, lvb_bits); + flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; + } else { + flags = DLM_LKF_CONVERT; + } + + error = control_lock(sdp, DLM_LOCK_NL, flags); + if (error) { + fs_err(sdp, "control lock NL error %d\n", error); + return; + } + + /* + * Everyone will see jid bits set in the lvb, run gfs2_recover_set(), + * and clear a jid bit in the lvb if the recovery is a success. + * Eventually all journals will be recovered, all jid bits will + * be cleared in the lvb, and everyone will clear BLOCK_LOCKS. + */ + + for (i = 0; i < recover_size; i++) { + if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { + fs_info(sdp, "recover generation %u jid %d\n", + start_gen, i); + gfs2_recover_set(sdp, i); + recover_set++; + } + } + if (recover_set) + return; + + /* + * No more jid bits set in lvb, all recovery is done, unblock locks + * (unless a new recover_prep callback has occured blocking locks + * again while working above) + */ + + spin_lock(&ls->ls_recover_spin); + if (ls->ls_recover_block == block_gen && + ls->ls_recover_start == start_gen) { + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "recover generation %u done\n", start_gen); + gfs2_glock_thaw(sdp); + } else { + fs_info(sdp, "recover generation %u block2 %u %u\n", + start_gen, block_gen, ls->ls_recover_block); + spin_unlock(&ls->ls_recover_spin); + } +} + +static int control_mount(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t start_gen, block_gen, mount_gen, lvb_gen; + int mounted_mode; + int retries = 0; + int error; + + memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb)); + memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb)); + memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE); + ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb; + init_completion(&ls->ls_sync_wait); + + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK); + if (error) { + fs_err(sdp, "control_mount control_lock NL error %d\n", error); + return error; + } + + error = mounted_lock(sdp, DLM_LOCK_NL, 0); + if (error) { + fs_err(sdp, "control_mount mounted_lock NL error %d\n", error); + control_unlock(sdp); + return error; + } + mounted_mode = DLM_LOCK_NL; + +restart: + if (retries++ && signal_pending(current)) { + error = -EINTR; + goto fail; + } + + /* + * We always start with both locks in NL. control_lock is + * demoted to NL below so we don't need to do it here. + */ + + if (mounted_mode != DLM_LOCK_NL) { + error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + if (error) + goto fail; + mounted_mode = DLM_LOCK_NL; + } + + /* + * Other nodes need to do some work in dlm recovery and gfs2_control + * before the recover_done and control_lock will be ready for us below. + * A delay here is not required but often avoids having to retry. + */ + + msleep_interruptible(500); + + /* + * Acquire control_lock in EX and mounted_lock in either EX or PR. + * control_lock lvb keeps track of any pending journal recoveries. + * mounted_lock indicates if any other nodes have the fs mounted. + */ + + error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK); + if (error == -EAGAIN) { + goto restart; + } else if (error) { + fs_err(sdp, "control_mount control_lock EX error %d\n", error); + goto fail; + } + + error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); + if (!error) { + mounted_mode = DLM_LOCK_EX; + goto locks_done; + } else if (error != -EAGAIN) { + fs_err(sdp, "control_mount mounted_lock EX error %d\n", error); + goto fail; + } + + error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); + if (!error) { + mounted_mode = DLM_LOCK_PR; + goto locks_done; + } else { + /* not even -EAGAIN should happen here */ + fs_err(sdp, "control_mount mounted_lock PR error %d\n", error); + goto fail; + } + +locks_done: + /* + * If we got both locks above in EX, then we're the first mounter. + * If not, then we need to wait for the control_lock lvb to be + * updated by other mounted nodes to reflect our mount generation. + * + * In simple first mounter cases, first mounter will see zero lvb_gen, + * but in cases where all existing nodes leave/fail before mounting + * nodes finish control_mount, then all nodes will be mounting and + * lvb_gen will be non-zero. + */ + + control_lvb_read(ls, &lvb_gen, lvb_bits); + + if (lvb_gen == 0xFFFFFFFF) { + /* special value to force mount attempts to fail */ + fs_err(sdp, "control_mount control_lock disabled\n"); + error = -EINVAL; + goto fail; + } + + if (mounted_mode == DLM_LOCK_EX) { + /* first mounter, keep both EX while doing first recovery */ + spin_lock(&ls->ls_recover_spin); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); + set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "first mounter control generation %u\n", lvb_gen); + return 0; + } + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + if (error) + goto fail; + + /* + * We are not first mounter, now we need to wait for the control_lock + * lvb generation to be >= the generation from our first recover_done + * and all lvb bits to be clear (no pending journal recoveries.) + */ + + if (!all_jid_bits_clear(lvb_bits)) { + /* journals need recovery, wait until all are clear */ + fs_info(sdp, "control_mount wait for journal recovery\n"); + goto restart; + } + + spin_lock(&ls->ls_recover_spin); + block_gen = ls->ls_recover_block; + start_gen = ls->ls_recover_start; + mount_gen = ls->ls_recover_mount; + + if (lvb_gen < mount_gen) { + /* wait for mounted nodes to update control_lock lvb to our + generation, which might include new recovery bits set */ + fs_info(sdp, "control_mount wait1 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; + } + + if (lvb_gen != start_gen) { + /* wait for mounted nodes to update control_lock lvb to the + latest recovery generation */ + fs_info(sdp, "control_mount wait2 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; + } + + if (block_gen == start_gen) { + /* dlm recovery in progress, wait for it to finish */ + fs_info(sdp, "control_mount wait3 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; } - error = dlm_new_lockspace(fsname, NULL, - DLM_LSFL_FS | DLM_LSFL_NEWEXCL | - (ls->ls_nodir ? DLM_LSFL_NODIR : 0), - GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); + memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); + memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); + spin_unlock(&ls->ls_recover_spin); + return 0; + +fail: + mounted_unlock(sdp); + control_unlock(sdp); + return error; +} + +static int dlm_recovery_wait(void *word) +{ + schedule(); + return 0; +} + +static int control_first_done(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t start_gen, block_gen; + int error; + +restart: + spin_lock(&ls->ls_recover_spin); + start_gen = ls->ls_recover_start; + block_gen = ls->ls_recover_block; + + if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) || + !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + /* sanity check, should not happen */ + fs_err(sdp, "control_first_done start %u block %u flags %lx\n", + start_gen, block_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + control_unlock(sdp); + return -1; + } + + if (start_gen == block_gen) { + /* + * Wait for the end of a dlm recovery cycle to switch from + * first mounter recovery. We can ignore any recover_slot + * callbacks between the recover_prep and next recover_done + * because we are still the first mounter and any failed nodes + * have not fully mounted, so they don't need recovery. + */ + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "control_first_done wait gen %u\n", start_gen); + + wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, + dlm_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + + clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags); + memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); + memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); + spin_unlock(&ls->ls_recover_spin); + + memset(lvb_bits, 0, sizeof(lvb_bits)); + control_lvb_write(ls, start_gen, lvb_bits); + + error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); + if (error) + fs_err(sdp, "control_first_done mounted PR error %d\n", error); + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK); if (error) - printk(KERN_ERR "dlm_new_lockspace error %d", error); + fs_err(sdp, "control_first_done control NL error %d\n", error); return error; } +/* + * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) + * to accomodate the largest slot number. (NB dlm slot numbers start at 1, + * gfs2 jids start at 0, so jid = slot - 1) + */ + +#define RECOVER_SIZE_INC 16 + +static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, + int num_slots) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + uint32_t *submit = NULL; + uint32_t *result = NULL; + uint32_t old_size, new_size; + int i, max_jid; + + max_jid = 0; + for (i = 0; i < num_slots; i++) { + if (max_jid < slots[i].slot - 1) + max_jid = slots[i].slot - 1; + } + + old_size = ls->ls_recover_size; + + if (old_size >= max_jid + 1) + return 0; + + new_size = old_size + RECOVER_SIZE_INC; + + submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + if (!submit || !result) { + kfree(submit); + kfree(result); + return -ENOMEM; + } + + spin_lock(&ls->ls_recover_spin); + memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t)); + memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t)); + kfree(ls->ls_recover_submit); + kfree(ls->ls_recover_result); + ls->ls_recover_submit = submit; + ls->ls_recover_result = result; + ls->ls_recover_size = new_size; + spin_unlock(&ls->ls_recover_spin); + return 0; +} + +static void free_recover_size(struct lm_lockstruct *ls) +{ + kfree(ls->ls_recover_submit); + kfree(ls->ls_recover_result); + ls->ls_recover_submit = NULL; + ls->ls_recover_result = NULL; + ls->ls_recover_size = 0; +} + +/* dlm calls before it does lock recovery */ + +static void gdlm_recover_prep(void *arg) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + spin_lock(&ls->ls_recover_spin); + ls->ls_recover_block = ls->ls_recover_start; + set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); + + if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); +} + +/* dlm calls after recover_prep has been completed on all lockspace members; + identifies slot/jid of failed member */ + +static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int jid = slot->slot - 1; + + spin_lock(&ls->ls_recover_spin); + if (ls->ls_recover_size < jid + 1) { + fs_err(sdp, "recover_slot jid %d gen %u short size %d", + jid, ls->ls_recover_block, ls->ls_recover_size); + spin_unlock(&ls->ls_recover_spin); + return; + } + + if (ls->ls_recover_submit[jid]) { + fs_info(sdp, "recover_slot jid %d gen %u prev %u", + jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); + } + ls->ls_recover_submit[jid] = ls->ls_recover_block; + spin_unlock(&ls->ls_recover_spin); +} + +/* dlm calls after recover_slot and after it completes lock recovery */ + +static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, + int our_slot, uint32_t generation) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + /* ensure the ls jid arrays are large enough */ + set_recover_size(sdp, slots, num_slots); + + spin_lock(&ls->ls_recover_spin); + ls->ls_recover_start = generation; + + if (!ls->ls_recover_mount) { + ls->ls_recover_mount = generation; + ls->ls_jid = our_slot - 1; + } + + if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); + + clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); + spin_unlock(&ls->ls_recover_spin); +} + +/* gfs2_recover thread has a journal recovery result */ + +static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, + unsigned int result) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + return; + + /* don't care about the recovery of own journal during mount */ + if (jid == ls->ls_jid) + return; + + spin_lock(&ls->ls_recover_spin); + if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + if (ls->ls_recover_size < jid + 1) { + fs_err(sdp, "recovery_result jid %d short size %d", + jid, ls->ls_recover_size); + spin_unlock(&ls->ls_recover_spin); + return; + } + + fs_info(sdp, "recover jid %d result %s\n", jid, + result == LM_RD_GAVEUP ? "busy" : "success"); + + ls->ls_recover_result[jid] = result; + + /* GAVEUP means another node is recovering the journal; delay our + next attempt to recover it, to give the other node a chance to + finish before trying again */ + + if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, + result == LM_RD_GAVEUP ? HZ : 0); + spin_unlock(&ls->ls_recover_spin); +} + +const struct dlm_lockspace_ops gdlm_lockspace_ops = { + .recover_prep = gdlm_recover_prep, + .recover_slot = gdlm_recover_slot, + .recover_done = gdlm_recover_done, +}; + +static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char cluster[GFS2_LOCKNAME_LEN]; + const char *fsname; + uint32_t flags; + int error, ops_result; + + /* + * initialize everything + */ + + INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); + spin_lock_init(&ls->ls_recover_spin); + ls->ls_recover_flags = 0; + ls->ls_recover_mount = 0; + ls->ls_recover_start = 0; + ls->ls_recover_block = 0; + ls->ls_recover_size = 0; + ls->ls_recover_submit = NULL; + ls->ls_recover_result = NULL; + + error = set_recover_size(sdp, NULL, 0); + if (error) + goto fail; + + /* + * prepare dlm_new_lockspace args + */ + + fsname = strchr(table, ':'); + if (!fsname) { + fs_info(sdp, "no fsname found\n"); + error = -EINVAL; + goto fail_free; + } + memset(cluster, 0, sizeof(cluster)); + memcpy(cluster, table, strlen(table) - strlen(fsname)); + fsname++; + + flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; + if (ls->ls_nodir) + flags |= DLM_LSFL_NODIR; + + /* + * create/join lockspace + */ + + error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, + &gdlm_lockspace_ops, sdp, &ops_result, + &ls->ls_dlm); + if (error) { + fs_err(sdp, "dlm_new_lockspace error %d\n", error); + goto fail_free; + } + + if (ops_result < 0) { + /* + * dlm does not support ops callbacks, + * old dlm_controld/gfs_controld are used, try without ops. + */ + fs_info(sdp, "dlm lockspace ops not used\n"); + free_recover_size(ls); + set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags); + return 0; + } + + if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) { + fs_err(sdp, "dlm lockspace ops disallow jid preset\n"); + error = -EINVAL; + goto fail_release; + } + + /* + * control_mount() uses control_lock to determine first mounter, + * and for later mounts, waits for any recoveries to be cleared. + */ + + error = control_mount(sdp); + if (error) { + fs_err(sdp, "mount control error %d\n", error); + goto fail_release; + } + + ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); + return 0; + +fail_release: + dlm_release_lockspace(ls->ls_dlm, 2); +fail_free: + free_recover_size(ls); +fail: + return error; +} + +static void gdlm_first_done(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int error; + + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + return; + + error = control_first_done(sdp); + if (error) + fs_err(sdp, "mount first_done error %d\n", error); +} + static void gdlm_unmount(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + goto release; + + /* wait for gfs2_control_wq to be done with this mount */ + + spin_lock(&ls->ls_recover_spin); + set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + flush_delayed_work_sync(&sdp->sd_control_work); + + /* mounted_lock and control_lock will be purged in dlm recovery */ +release: if (ls->ls_dlm) { dlm_release_lockspace(ls->ls_dlm, 2); ls->ls_dlm = NULL; } + + free_recover_size(ls); } static const match_table_t dlm_tokens = { @@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = { const struct lm_lockops gfs2_dlm_ops = { .lm_proto_name = "lock_dlm", .lm_mount = gdlm_mount, + .lm_first_done = gdlm_first_done, + .lm_recovery_result = gdlm_recovery_result, .lm_unmount = gdlm_unmount, .lm_put_lock = gdlm_put_lock, .lm_lock = gdlm_lock, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c150298e2d8..a8d9bcd0e19 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -28,6 +28,8 @@ #include "recovery.h" #include "dir.h" +struct workqueue_struct *gfs2_control_wq; + static struct shrinker qd_shrinker = { .shrink = gfs2_shrink_qd_memory, .seeks = DEFAULT_SEEKS, @@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void) if (!gfs_recovery_wq) goto fail_wq; + gfs2_control_wq = alloc_workqueue("gfs2_control", + WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); + if (!gfs2_control_wq) + goto fail_control; + gfs2_register_debugfs(); printk("GFS2 installed\n"); return 0; +fail_control: + destroy_workqueue(gfs_recovery_wq); fail_wq: unregister_filesystem(&gfs2meta_fs_type); fail_unregister: @@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); + destroy_workqueue(gfs2_control_wq); rcu_barrier(); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index fe72e79e6ff..6aacf3f230a 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp) { char *message = "FIRSTMOUNT=Done"; char *envp[] = { message, NULL }; - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - ls->ls_first_done = 1; + + fs_info(sdp, "first mount done, others may mount\n"); + + if (sdp->sd_lockstruct.ls_ops->lm_first_done) + sdp->sd_lockstruct.ls_ops->lm_first_done(sdp); + kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } @@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) struct gfs2_args *args = &sdp->sd_args; const char *proto = sdp->sd_proto_name; const char *table = sdp->sd_table_name; - const char *fsname; char *o, *options; int ret; @@ -1004,21 +1007,12 @@ hostdata_error: } } - if (sdp->sd_args.ar_spectator) - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); - else - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, - sdp->sd_lockstruct.ls_jid); - - fsname = strchr(table, ':'); - if (fsname) - fsname++; if (lm->lm_mount == NULL) { fs_info(sdp, "Now mounting FS...\n"); complete_all(&sdp->sd_locking_init); return 0; } - ret = lm->lm_mount(sdp, fsname); + ret = lm->lm_mount(sdp, table); if (ret == 0) fs_info(sdp, "Joined cluster. Now mounting FS...\n"); complete_all(&sdp->sd_locking_init); @@ -1084,7 +1078,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; - set_bit(SDF_NORECOVERY, &sdp->sd_flags); + set_bit(SDF_RORECOVERY, &sdp->sd_flags); } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; @@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) goto fail; + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); + gfs2_create_debugfs_file(sdp); error = gfs2_sys_fs_add(sdp); @@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent goto fail_sb; } + if (sdp->sd_args.ar_spectator) + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", + sdp->sd_table_name); + else + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", + sdp->sd_table_name, sdp->sd_lockstruct.ls_jid); + error = init_inodes(sdp, DO); if (error) goto fail_sb; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f2a02edcac8..963b2d75200 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, char env_status[20]; char *envp[] = { env_jid, env_status, NULL }; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + ls->ls_recover_jid_done = jid; ls->ls_recover_jid_status = message; sprintf(env_jid, "JID=%d", jid); sprintf(env_status, "RECOVERY=%s", message == LM_RD_SUCCESS ? "Done" : "Failed"); kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); + + if (sdp->sd_lockstruct.ls_ops->lm_recovery_result) + sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message); } void gfs2_recover_func(struct work_struct *work) @@ -512,7 +516,9 @@ void gfs2_recover_func(struct work_struct *work) if (error) goto fail_gunlock_ji; - if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { + if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) { + ro = 1; + } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) ro = 1; } else { @@ -577,6 +583,7 @@ fail_gunlock_j: fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); fail: + jd->jd_recover_error = error; gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); @@ -605,6 +612,6 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); - return 0; + return wait ? jd->jd_recover_error : 0; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 22234627f68..981bfa32121 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1108,9 +1108,9 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - gfs2_blkrsv_put(ip); if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + gfs2_blkrsv_put(ip); } /** diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 443cabcfcd2..d33172c291b 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) ssize_t ret; int val = 0; - if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags)) + if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) val = 1; ret = sprintf(buf, "%d\n", val); return ret; @@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) val = simple_strtol(buf, NULL, 0); if (val == 1) - set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); else if (val == 0) { - clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); smp_mb__after_clear_bit(); gfs2_glock_thaw(sdp); } else { @@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) goto out; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) goto out; - sdp->sd_lockstruct.ls_first = first; - rv = 0; + sdp->sd_lockstruct.ls_first = first; + rv = 0; out: spin_unlock(&sdp->sd_jindex_spin); return rv ? rv : len; @@ -360,19 +360,14 @@ out: static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_first_done); + return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); } -static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) { - unsigned jid; struct gfs2_jdesc *jd; int rv; - rv = sscanf(buf, "%u", &jid); - if (rv != 1) - return -EINVAL; - rv = -ESHUTDOWN; spin_lock(&sdp->sd_jindex_spin); if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) @@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) } out: spin_unlock(&sdp->sd_jindex_spin); + return rv; +} + +static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned jid; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + + rv = gfs2_recover_set(sdp, jid); + return rv ? rv : len; } diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h index e94560e836d..79182d6ad6a 100644 --- a/fs/gfs2/sys.h +++ b/fs/gfs2/sys.h @@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp); int gfs2_sys_init(void); void gfs2_sys_uninit(void); +int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid); + #endif /* __SYS_DOT_H__ */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e425ad9d049..1e85a7ac021 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -583,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page) } static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { int rc; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ee92538b06..8102db9b926 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); #else #define nfs_migrate_page NULL #endif diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0c3885255f9..834f0fe96f8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1688,7 +1688,7 @@ out_error: #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page) + struct page *page, enum migrate_mode mode) { /* * If PagePrivate is set, then the page is currently associated with @@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, nfs_fscache_release_page(page, GFP_KERNEL); - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } #endif diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 10e6366608f..8df1ea4a6ff 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -80,3 +80,13 @@ config NFSD_V4 available from http://linux-nfs.org/. If unsure, say N. + +config NFSD_FAULT_INJECTION + bool "NFS server manual fault injection" + depends on NFSD_V4 && DEBUG_KERNEL + help + This option enables support for manually injecting faults + into the NFS server. This is intended to be used for + testing error recovery on the NFS client. + + If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9b118ee2019..af32ef06b4f 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 62f3b9074e8..cf8a6bd062f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_expkey key; struct svc_expkey *ek = NULL; - if (mesg[mlen-1] != '\n') + if (mlen < 1 || mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; @@ -1226,12 +1226,12 @@ nfsd_export_init(void) int rv; dprintk("nfsd: initializing export module.\n"); - rv = cache_register(&svc_export_cache); + rv = cache_register_net(&svc_export_cache, &init_net); if (rv) return rv; - rv = cache_register(&svc_expkey_cache); + rv = cache_register_net(&svc_expkey_cache, &init_net); if (rv) - cache_unregister(&svc_export_cache); + cache_unregister_net(&svc_export_cache, &init_net); return rv; } @@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void) dprintk("nfsd: shutting down export module.\n"); - cache_unregister(&svc_expkey_cache); - cache_unregister(&svc_export_cache); + cache_unregister_net(&svc_expkey_cache, &init_net); + cache_unregister_net(&svc_export_cache, &init_net); svcauth_unix_purge(); dprintk("nfsd: export shutdown complete.\n"); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 00000000000..ce7f0758d84 --- /dev/null +++ b/fs/nfsd/fault_inject.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Uses debugfs to create fault injection points for client testing + */ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "state.h" +#include "fault_inject.h" + +struct nfsd_fault_inject_op { + char *file; + void (*func)(u64); +}; + +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .func = nfsd_forget_clients, + }, + { + .file = "forget_locks", + .func = nfsd_forget_locks, + }, + { + .file = "forget_openowners", + .func = nfsd_forget_openowners, + }, + { + .file = "forget_delegations", + .func = nfsd_forget_delegations, + }, + { + .file = "recall_delegations", + .func = nfsd_recall_delegations, + }, +}; + +static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); +static struct dentry *debug_dir; + +static int nfsd_inject_set(void *op_ptr, u64 val) +{ + struct nfsd_fault_inject_op *op = op_ptr; + + if (val == 0) + printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); + else + printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + + op->func(val); + return 0; +} + +static int nfsd_inject_get(void *data, u64 *val) +{ + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); + +void nfsd_fault_inject_cleanup(void) +{ + debugfs_remove_recursive(debug_dir); +} + +int nfsd_fault_inject_init(void) +{ + unsigned int i; + struct nfsd_fault_inject_op *op; + mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + + debug_dir = debugfs_create_dir("nfsd", NULL); + if (!debug_dir) + goto fail; + + for (i = 0; i < NUM_INJECT_OPS; i++) { + op = &inject_ops[i]; + if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) + goto fail; + } + return 0; + +fail: + nfsd_fault_inject_cleanup(); + return -ENOMEM; +} diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h new file mode 100644 index 00000000000..90bd0570956 --- /dev/null +++ b/fs/nfsd/fault_inject.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Function definitions for fault injection + */ + +#ifndef LINUX_NFSD_FAULT_INJECT_H +#define LINUX_NFSD_FAULT_INJECT_H + +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +void nfsd_forget_clients(u64); +void nfsd_forget_locks(u64); +void nfsd_forget_openowners(u64); +void nfsd_forget_delegations(u64); +void nfsd_recall_delegations(u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +static inline void nfsd_forget_clients(u64 num) {} +static inline void nfsd_forget_locks(u64 num) {} +static inline void nfsd_forget_openowners(u64 num) {} +static inline void nfsd_forget_delegations(u64 num) {} +static inline void nfsd_recall_delegations(u64 num) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 55780a22fdb..94096273cd6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -36,6 +36,7 @@ #include <linux/seq_file.h> #include <linux/sched.h> #include <linux/slab.h> +#include <net/net_namespace.h> #include "idmap.h" #include "nfsd.h" @@ -466,20 +467,20 @@ nfsd_idmap_init(void) { int rv; - rv = cache_register(&idtoname_cache); + rv = cache_register_net(&idtoname_cache, &init_net); if (rv) return rv; - rv = cache_register(&nametoid_cache); + rv = cache_register_net(&nametoid_cache, &init_net); if (rv) - cache_unregister(&idtoname_cache); + cache_unregister_net(&idtoname_cache, &init_net); return rv; } void nfsd_idmap_shutdown(void) { - cache_unregister(&idtoname_cache); - cache_unregister(&nametoid_cache); + cache_unregister_net(&idtoname_cache, &init_net); + cache_unregister_net(&nametoid_cache, &init_net); } static int diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c5e28ed8bca..896da74ec56 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ { __be32 status; - /* Only reclaims from previously confirmed clients are valid */ - if ((status = nfs4_check_open_reclaim(&open->op_clientid))) - return status; - /* We don't know the target directory, and therefore can not * set the change info */ @@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + status = nfs4_check_open_reclaim(&open->op_clientid); + if (status) + goto out; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, &cstate->current_fh, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 80a0be9ed00..0b3e875d1ab 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -117,8 +117,7 @@ out_no_tfm: return status; } -int -nfsd4_create_clid_dir(struct nfs4_client *clp) +void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char *dname = clp->cl_recdir; @@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (!rec_file || clp->cl_firststate) - return 0; - + if (clp->cl_firststate) + return; clp->cl_firststate = 1; + if (!rec_file) + return; status = nfs4_save_creds(&original_cred); if (status < 0) - return status; + return; dir = rec_file->f_path.dentry; /* lock the parent */ @@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) status = PTR_ERR(dentry); goto out_unlock; } - status = -EEXIST; if (dentry->d_inode) + /* + * In the 4.1 case, where we're called from + * reclaim_complete(), records from the previous reboot + * may still be left, so this is OK. + * + * In the 4.0 case, we should never get here; but we may + * as well be forgiving and just succeed silently. + */ goto out_put; status = mnt_want_write_file(rec_file); if (status) @@ -164,7 +171,6 @@ out_unlock: " and is writeable", status, user_recovery_dirname); nfs4_reset_creds(original_cred); - return status; } typedef int (recdir_func)(struct dentry *, struct dentry *); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ca16dc09e0..e8c98f00967 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -49,12 +49,20 @@ time_t nfsd4_lease = 90; /* default lease time */ time_t nfsd4_grace = 90; static time_t boot_time; -static stateid_t zerostateid; /* bits all 0 */ -static stateid_t onestateid; /* bits all 1 */ + +#define all_ones {{~0,~0},~0} +static const stateid_t one_stateid = { + .si_generation = ~0, + .si_opaque = all_ones, +}; +static const stateid_t zero_stateid = { + /* all fields zero */ +}; + static u64 current_sessionid = 1; -#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) -#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) /* forward declarations */ static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); @@ -133,21 +141,21 @@ unsigned int max_delegations; * Open owner state (share locks) */ -/* hash tables for open owners */ -#define OPEN_OWNER_HASH_BITS 8 -#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) -#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) +/* hash tables for lock and open owners */ +#define OWNER_HASH_BITS 8 +#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) +#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); ret += clientid; - return ret & OPEN_OWNER_HASH_MASK; + return ret & OWNER_HASH_MASK; } -static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; +static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 @@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) list_del(&lo->lo_owner.so_strhash); list_del(&lo->lo_perstateid); + list_del(&lo->lo_owner_ino_hash); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); @@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); + clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); if (clp->cl_name.data == NULL) { kfree(clp); return NULL; } - memcpy(clp->cl_name.data, name.data, name.len); clp->cl_name.len = name.len; return clp; } @@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp) spin_unlock(&recall_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { @@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void) nfsd4_free_slab(&deleg_slab); } -static int +int nfsd4_init_slabs(void) { openowner_slab = kmem_cache_create("nfsd4_openowners", @@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } @@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) struct nfs4_stateowner *so; struct nfs4_openowner *oo; - list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + if (!so->so_is_open_owner) + continue; if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { oo = openowner(so); renew_client(oo->oo_owner.so_client); @@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); + strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); oo = find_openstateowner_str(strhashval, open); open->op_openowner = oo; if (!oo) { @@ -3123,7 +3132,6 @@ nfs4_laundromat(void) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } test_val = nfsd4_lease; @@ -3718,13 +3726,11 @@ out: } -/* - * Lock owner state (byte-range locks) - */ #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCK_HASH_BITS 8 -#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) -#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) +#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) static inline u64 end_offset(u64 start, u64 len) @@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static inline unsigned int -lock_ownerstr_hashval(struct inode *inode, u32 cl_id, - struct xdr_netobj *ownername) +static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) { return (file_hashval(inode) + cl_id + opaque_hashval(ownername->data, ownername->len)) - & LOCK_HASH_MASK; + & LOCKOWNER_INO_HASH_MASK; } -static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; +static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that @@ -3809,23 +3813,39 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); - struct nfs4_stateowner *op; + unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; - list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(op, owner, clid)) - return lockowner(op); + list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; } return NULL; } static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) { - list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); + struct inode *inode = open_stp->st_file->fi_inode; + unsigned int inohash = lockowner_ino_hashval(inode, + clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + + list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * - * strhashval = lock_ownerstr_hashval + * strhashval = ownerstr_hashval */ static struct nfs4_lockowner * @@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) __set_bit(access, &lock_stp->st_access_bmap); } +__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +{ + struct nfs4_file *fi = ost->st_file; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *cl = oo->oo_owner.so_client; + struct nfs4_lockowner *lo; + unsigned int strhashval; + + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + if (lo) { + if (!cstate->minorversion) + return nfserr_bad_seqid; + /* XXX: a lockowner always has exactly one stateid: */ + *lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return nfs_ok; + } + strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, + &lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + *lst = alloc_init_lock_stateid(lo, fi, ost); + if (*lst == NULL) { + release_lockowner(lo); + return nfserr_jukebox; + } + *new = true; + return nfs_ok; +} + /* * LOCK operation */ @@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock file_lock; struct file_lock conflock; __be32 status = 0; - unsigned int strhashval; + bool new_state = false; int lkflg; int err; @@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * lock stateid. */ struct nfs4_ol_stateid *open_stp = NULL; - + + if (nfsd4_has_session(cstate)) + /* See rfc 5661 18.10.3: given clientid is ignored: */ + memcpy(&lock->v.new.clientid, + &cstate->session->se_client->cl_clientid, + sizeof(clientid_t)); + status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && - STALE_CLIENTID(&lock->lk_new_clientid)) + if (STALE_CLIENTID(&lock->lk_new_clientid)) goto out; /* validate and update open stateid and open seqid */ @@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; - if (!nfsd4_has_session(cstate) && - !same_clid(&open_sop->oo_owner.so_client->cl_clientid, + if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->v.new.clientid)) goto out; - /* create lockowner and lock stateid */ - fp = open_stp->st_file; - strhashval = lock_ownerstr_hashval(fp->fi_inode, - open_sop->oo_owner.so_client->cl_clientid.cl_id, - &lock->v.new.owner); - /* XXX: Do we need to check for duplicate stateowners on - * the same file, or should they just be allowed (and - * create new stateids)? */ - status = nfserr_jukebox; - lock_sop = alloc_init_lock_stateowner(strhashval, - open_sop->oo_owner.so_client, open_stp, lock); - if (lock_sop == NULL) - goto out; - lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); - if (lock_stp == NULL) + status = lookup_or_create_lock_state(cstate, open_stp, lock, + &lock_stp, &new_state); + if (status) goto out; } else { /* lock (lock owner + lock stateid) already exists */ @@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, NFS4_LOCK_STID, &lock_stp); if (status) goto out; - lock_sop = lockowner(lock_stp->st_stateowner); - fp = lock_stp->st_file; } - /* lock_sop and lock_stp have been created or found */ + lock_sop = lockowner(lock_stp->st_stateowner); + fp = lock_stp->st_file; lkflg = setlkflg(lock->lk_type); status = nfs4_check_openmode(lock_stp, lkflg); @@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: - if (status && lock->lk_is_new && lock_sop) + if (status && new_state) release_lockowner(lock_sop); if (!cstate->replay_owner) nfs4_unlock_state(); @@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; - int i; + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfserr_locks_held; - /* XXX: we're doing a linear search through all the lockowners. - * Yipes! For now we'll just hope clients aren't really using - * release_lockowner much, but eventually we have to fix these - * data structures. */ INIT_LIST_HEAD(&matches); - for (i = 0; i < LOCK_HASH_SIZE; i++) { - list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - lo = lockowner(sop); - if (check_for_locks(stp->st_file, lo)) - goto out; - list_add(&lo->lo_list, &matches); - } + + list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + if (sop->so_is_open_owner) + continue; + if (!same_owner_str(sop, owner, clid)) + continue; + list_for_each_entry(stp, &sop->so_stateids, + st_perstateowner) { + lo = lockowner(sop); + if (check_for_locks(stp->st_file, lo)) + goto out; + list_add(&lo->lo_list, &matches); } } /* Clients probably won't expect us to return with some (but not all) @@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid) return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; } +#ifdef CONFIG_NFSD_FAULT_INJECTION + +void nfsd_forget_clients(u64 num) +{ + struct nfs4_client *clp, *next; + int count = 0; + + nfs4_lock_state(); + list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { + nfsd4_remove_clid_dir(clp); + expire_client(clp); + if (++count == num) + break; + } + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d clients", count); +} + +static void release_lockowner_sop(struct nfs4_stateowner *sop) +{ + release_lockowner(lockowner(sop)); +} + +static void release_openowner_sop(struct nfs4_stateowner *sop) +{ + release_openowner(openowner(sop)); +} + +static int nfsd_release_n_owners(u64 num, bool is_open_owner, + void (*release_sop)(struct nfs4_stateowner *)) +{ + int i, count = 0; + struct nfs4_stateowner *sop, *next; + + for (i = 0; i < OWNER_HASH_SIZE; i++) { + list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { + if (sop->so_is_open_owner != is_open_owner) + continue; + release_sop(sop); + if (++count == num) + return count; + } + } + return count; +} + +void nfsd_forget_locks(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, false, release_lockowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d locks", count); +} + +void nfsd_forget_openowners(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, true, release_openowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d open owners", count); +} + +int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +{ + int i, count = 0; + struct nfs4_file *fp, *fnext; + struct nfs4_delegation *dp, *dnext; + + for (i = 0; i < FILE_HASH_SIZE; i++) { + list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { + list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { + deleg_func(dp); + if (++count == num) + return count; + } + } + } + + return count; +} + +void nfsd_forget_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + count = nfsd_process_n_delegations(num, unhash_delegation); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d delegations", count); +} + +void nfsd_recall_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + spin_lock(&recall_lock); + count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); + spin_unlock(&recall_lock); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Recalled %d delegations", count); +} + +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + /* initialization to perform at module load time: */ -int +void nfs4_state_init(void) { - int i, status; + int i; - status = nfsd4_init_slabs(); - if (status) - return status; for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&conf_id_hashtbl[i]); INIT_LIST_HEAD(&conf_str_hashtbl[i]); @@ -4416,18 +4566,15 @@ nfs4_state_init(void) for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } - for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); - } - for (i = 0; i < LOCK_HASH_SIZE; i++) { - INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); + for (i = 0; i < OWNER_HASH_SIZE; i++) { + INIT_LIST_HEAD(&ownerstr_hashtbl[i]); } - memset(&onestateid, ~0, sizeof(stateid_t)); + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); reclaim_str_hashtbl_size = 0; - return 0; } static void @@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b6fa792d6b8..0ec5a1b9700 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp, static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { if (p == argp->tmp) { - p = kmalloc(nbytes, GFP_KERNEL); + p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); if (!p) return NULL; - memcpy(p, argp->tmp, nbytes); } else { BUG_ON(p != argp->tmpp); argp->tmpp = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bb4a11d58a5..748eda93ce5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -18,6 +18,7 @@ #include "idmap.h" #include "nfsd.h" #include "cache.h" +#include "fault_inject.h" /* * We have a single directory with several nodes in it. @@ -1128,9 +1129,13 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = nfs4_state_init(); /* nfs4 locking state */ + retval = nfsd4_init_slabs(); if (retval) return retval; + nfs4_state_init(); + retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ + if (retval) + goto out_free_slabs; nfsd_stat_init(); /* Statistics */ retval = nfsd_reply_cache_init(); if (retval) @@ -1161,6 +1166,8 @@ out_free_cache: nfsd_reply_cache_shutdown(); out_free_stat: nfsd_stat_shutdown(); + nfsd_fault_inject_cleanup(); +out_free_slabs: nfsd4_free_slabs(); return retval; } @@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void) nfsd_lockd_shutdown(); nfsd_idmap_shutdown(); nfsd4_free_slabs(); + nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 58134a23fdf..1d1e8589b4c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq) */ #ifdef CONFIG_NFSD_V4 extern unsigned int max_delegations; -int nfs4_state_init(void); +void nfs4_state_init(void); +int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); void nfs4_state_shutdown(void); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); #else -static inline int nfs4_state_init(void) { return 0; } +static inline void nfs4_state_init(void) { } +static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } static inline void nfs4_state_shutdown(void) { } @@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ -#define NFSD_WRITEONLY_ATTRS_WORD1 \ -(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEONLY_ATTRS_WORD1 \ + (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ -#define NFSD_WRITEABLE_ATTRS_WORD0 \ -(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) -#define NFSD_WRITEABLE_ATTRS_WORD1 \ -(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEABLE_ATTRS_WORD0 \ + (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) +#define NFSD_WRITEABLE_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #define NFSD_WRITEABLE_ATTRS_WORD2 0 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index a3cf38476a1..ffb5df1db94 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -366,6 +366,7 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ + struct list_head lo_owner_ino_hash; /* hash by owner,file */ struct list_head lo_perstateid; /* for lockowners only */ struct list_head lo_list; /* for temporary uses */ }; @@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void); extern int nfs4_client_to_reclaim(const char *name); extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); extern void nfsd4_recdir_purge_old(void); -extern int nfsd4_create_clid_dir(struct nfs4_client *clp); +extern void nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); extern void release_session_client(struct nfsd4_session *); extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d25a723b68a..edf6d3ed877 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac return error; } -#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." -#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" +/* + * NFS junction information is stored in an extended attribute. + */ +#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs" + +/** + * nfsd4_is_junction - Test if an object could be an NFS junction + * + * @dentry: object to test + * + * Returns 1 if "dentry" appears to contain NFS junction information. + * Otherwise 0 is returned. + */ int nfsd4_is_junction(struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; - if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) + if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) return 0; return 1; } diff --git a/fs/pipe.c b/fs/pipe.c index f0e485d54e6..a932ced92a1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1137,7 +1137,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) if (nr_pages < pipe->nrbufs) return -EBUSY; - bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); + bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!bufs)) return -ENOMEM; diff --git a/fs/proc/array.c b/fs/proc/array.c index 8c344f037bd..9252ee3b71e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", pid_nr_ns(pid, ns), tcomm, state, @@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, task->policy, (unsigned long long)delayacct_blkio_ticks(task), cputime_to_clock_t(gtime), - cputime_to_clock_t(cgtime)); + cputime_to_clock_t(cgtime), + (mm && permitted) ? mm->start_data : 0, + (mm && permitted) ? mm->end_data : 0, + (mm && permitted) ? mm->start_brk : 0); if (mm) mmput(mm); return 0; diff --git a/fs/proc/base.c b/fs/proc/base.c index 8173dfd89cb..5485a5388ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -654,6 +654,8 @@ static int proc_pid_permission(struct inode *inode, int mask) bool has_perms; task = get_proc_task(inode); + if (!task) + return -ESRCH; has_perms = has_pid_permissions(pid, task, 1); put_task_struct(task); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index f744be98cd5..af0b7380259 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -70,11 +70,15 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, spin_lock(&cache->lock); while (1) { - for (i = 0; i < cache->entries; i++) - if (cache->entry[i].block == block) + for (i = cache->curr_blk, n = 0; n < cache->entries; n++) { + if (cache->entry[i].block == block) { + cache->curr_blk = i; break; + } + i = (i + 1) % cache->entries; + } - if (i == cache->entries) { + if (n == cache->entries) { /* * Block not in cache, if all cache entries are used * go to sleep waiting for one to become available. @@ -245,6 +249,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } + cache->curr_blk = 0; cache->next_blk = 0; cache->unused = entries; cache->entries = entries; @@ -332,17 +337,20 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, u64 *block, int *offset, int length) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int bytes, copied = length; + int bytes, res = length; struct squashfs_cache_entry *entry; TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); - if (entry->error) - return entry->error; - else if (*offset >= entry->length) - return -EIO; + if (entry->error) { + res = entry->error; + goto error; + } else if (*offset >= entry->length) { + res = -EIO; + goto error; + } bytes = squashfs_copy_data(buffer, entry, *offset, length); if (buffer) @@ -358,7 +366,11 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, squashfs_cache_put(entry); } - return copied; + return res; + +error: + squashfs_cache_put(entry); + return res; } diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index fd7b3b3bda1..81afbccfa84 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -208,8 +208,8 @@ int squashfs_read_inode(struct inode *inode, long long ino) inode->i_op = &squashfs_inode_ops; inode->i_fop = &generic_ro_fops; inode->i_mode |= S_IFREG; - inode->i_blocks = ((inode->i_size - - le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1; + inode->i_blocks = (inode->i_size - + le64_to_cpu(sqsh_ino->sparse) + 511) >> 9; squashfs_i(inode)->fragment_block = frag_blk; squashfs_i(inode)->fragment_size = frag_size; diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 651f0b31d29..52934a22f29 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -28,6 +28,7 @@ struct squashfs_cache { char *name; int entries; + int curr_blk; int next_blk; int num_waiters; int unused; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index d0858c2d9a4..ecaa2f7bdb8 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -290,7 +290,7 @@ handle_fragments: check_directory_table: /* Sanity check directory_table */ - if (msblk->directory_table >= next_table) { + if (msblk->directory_table > next_table) { err = -EINVAL; goto failed_mount; } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b09ba2dd8b6..f922cbacdb9 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -38,9 +38,6 @@ DEFINE_SPINLOCK(dbg_lock); -static char dbg_key_buf0[128]; -static char dbg_key_buf1[128]; - static const char *get_key_fmt(int fmt) { switch (fmt) { @@ -103,8 +100,8 @@ static const char *get_dent_type(int type) } } -static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, - char *buffer) +const char *dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, int len) { char *p = buffer; int type = key_type(c, key); @@ -112,45 +109,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { switch (type) { case UBIFS_INO_KEY: - sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), - get_key_type(type)); + len -= snprintf(p, len, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); break; case UBIFS_DENT_KEY: case UBIFS_XENT_KEY: - sprintf(p, "(%lu, %s, %#08x)", - (unsigned long)key_inum(c, key), - get_key_type(type), key_hash(c, key)); + len -= snprintf(p, len, "(%lu, %s, %#08x)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_hash(c, key)); break; case UBIFS_DATA_KEY: - sprintf(p, "(%lu, %s, %u)", - (unsigned long)key_inum(c, key), - get_key_type(type), key_block(c, key)); + len -= snprintf(p, len, "(%lu, %s, %u)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_block(c, key)); break; case UBIFS_TRUN_KEY: - sprintf(p, "(%lu, %s)", - (unsigned long)key_inum(c, key), - get_key_type(type)); + len -= snprintf(p, len, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); break; default: - sprintf(p, "(bad key type: %#08x, %#08x)", - key->u32[0], key->u32[1]); + len -= snprintf(p, len, "(bad key type: %#08x, %#08x)", + key->u32[0], key->u32[1]); } } else - sprintf(p, "bad key format %d", c->key_fmt); -} - -const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) -{ - /* dbg_lock must be held */ - sprintf_key(c, key, dbg_key_buf0); - return dbg_key_buf0; -} - -const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) -{ - /* dbg_lock must be held */ - sprintf_key(c, key, dbg_key_buf1); - return dbg_key_buf1; + len -= snprintf(p, len, "bad key format %d", c->key_fmt); + ubifs_assert(len > 0); + return p; } const char *dbg_ntype(int type) @@ -319,6 +305,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int i, n; union ubifs_key key; const struct ubifs_ch *ch = node; + char key_buf[DBG_KEY_BUF_LEN]; if (dbg_is_tst_rcvry(c)) return; @@ -474,7 +461,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_ino_node *ino = node; key_read(c, &ino->key, &key); - printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); printk(KERN_DEBUG "\tcreat_sqnum %llu\n", (unsigned long long)le64_to_cpu(ino->creat_sqnum)); printk(KERN_DEBUG "\tsize %llu\n", @@ -517,7 +505,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int nlen = le16_to_cpu(dent->nlen); key_read(c, &dent->key, &key); - printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); printk(KERN_DEBUG "\tinum %llu\n", (unsigned long long)le64_to_cpu(dent->inum)); printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); @@ -541,7 +530,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); - printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); printk(KERN_DEBUG "\tsize %u\n", le32_to_cpu(dn->size)); printk(KERN_DEBUG "\tcompr_typ %d\n", @@ -582,7 +572,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) key_read(c, &br->key, &key); printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), - le32_to_cpu(br->len), DBGKEY(&key)); + le32_to_cpu(br->len), + dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); } break; } @@ -934,6 +926,7 @@ void dbg_dump_znode(const struct ubifs_info *c, { int n; const struct ubifs_zbranch *zbr; + char key_buf[DBG_KEY_BUF_LEN]; spin_lock(&dbg_lock); if (znode->parent) @@ -958,12 +951,16 @@ void dbg_dump_znode(const struct ubifs_info *c, printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " "%s\n", n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, - DBGKEY(&zbr->key)); + dbg_snprintf_key(c, &zbr->key, + key_buf, + DBG_KEY_BUF_LEN)); else printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " "%s\n", n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, - DBGKEY(&zbr->key)); + dbg_snprintf_key(c, &zbr->key, + key_buf, + DBG_KEY_BUF_LEN)); } spin_unlock(&dbg_lock); } @@ -1260,6 +1257,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, int err, nlen1, nlen2, cmp; struct ubifs_dent_node *dent1, *dent2; union ubifs_key key; + char key_buf[DBG_KEY_BUF_LEN]; ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); @@ -1290,9 +1288,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, key_read(c, &dent1->key, &key); if (keys_cmp(c, &zbr1->key, &key)) { dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, - zbr1->offs, DBGKEY(&key)); + zbr1->offs, dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); dbg_err("but it should have key %s according to tnc", - DBGKEY(&zbr1->key)); + dbg_snprintf_key(c, &zbr1->key, key_buf, + DBG_KEY_BUF_LEN)); dbg_dump_node(c, dent1); goto out_free; } @@ -1300,9 +1300,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, key_read(c, &dent2->key, &key); if (keys_cmp(c, &zbr2->key, &key)) { dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, - zbr1->offs, DBGKEY(&key)); + zbr1->offs, dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); dbg_err("but it should have key %s according to tnc", - DBGKEY(&zbr2->key)); + dbg_snprintf_key(c, &zbr2->key, key_buf, + DBG_KEY_BUF_LEN)); dbg_dump_node(c, dent2); goto out_free; } @@ -1319,7 +1321,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, dbg_err("2 xent/dent nodes with the same name"); else dbg_err("bad order of colliding key %s", - DBGKEY(&key)); + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); dbg_dump_node(c, dent1); diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8d9c4681018..307ab1d23f7 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -169,40 +169,39 @@ struct ubifs_global_debug_info { spin_unlock(&dbg_lock); \ } while (0) -const char *dbg_key_str0(const struct ubifs_info *c, - const union ubifs_key *key); -const char *dbg_key_str1(const struct ubifs_info *c, - const union ubifs_key *key); - -/* - * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message - * macros. - */ -#define DBGKEY(key) dbg_key_str0(c, (key)) -#define DBGKEY1(key) dbg_key_str1(c, (key)) - -extern spinlock_t dbg_lock; - -#define ubifs_dbg_msg(type, fmt, ...) do { \ - spin_lock(&dbg_lock); \ - pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ +#define ubifs_dbg_msg(type, fmt, ...) \ + pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) + +#define DBG_KEY_BUF_LEN 32 +#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ + char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ + pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ + dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ } while (0) /* Just a debugging messages not related to any specific UBIFS subsystem */ -#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) +#define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__) + /* General messages */ #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ #define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) +#define dbg_jnlk(key, fmt, ...) \ + ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__) /* Additional TNC messages */ #define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) +#define dbg_tnck(key, fmt, ...) \ + ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__) /* Additional lprops messages */ #define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) /* Additional LEB find messages */ #define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) /* Additional mount messages */ #define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) +#define dbg_mntk(key, fmt, ...) \ + ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__) /* Additional I/O messages */ #define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) /* Additional commit messages */ @@ -218,6 +217,7 @@ extern spinlock_t dbg_lock; /* Additional recovery messages */ #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) +extern spinlock_t dbg_lock; extern struct ubifs_global_debug_info ubifs_dbg; static inline int dbg_is_chk_gen(const struct ubifs_info *c) @@ -258,6 +258,8 @@ const char *dbg_cstate(int cmt_state); const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); +const char *dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, int len); void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, @@ -368,6 +370,10 @@ static inline const char *dbg_jhead(int jhead) { return ""; } static inline const char * dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key) { return ""; } +static inline const char * +dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, + int len) { return ""; } static inline void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) { return; } static inline void dbg_dump_node(const struct ubifs_info *c, diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index cef0460f4c5..2f438ab2e7a 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -697,9 +697,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; struct ubifs_inode *ui = ubifs_inode(inode); - dbg_jnl("ino %lu, blk %u, len %d, key %s", - (unsigned long)key_inum(c, key), key_block(c, key), len, - DBGKEY(key)); + dbg_jnlk(key, "ino %lu, blk %u, len %d, key ", + (unsigned long)key_inum(c, key), key_block(c, key), len); ubifs_assert(len <= UBIFS_BLOCK_SIZE); data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); @@ -1177,7 +1176,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, dn = (void *)trun + UBIFS_TRUN_NODE_SZ; blk = new_size >> UBIFS_BLOCK_SHIFT; data_key_init(c, &key, inum, blk); - dbg_jnl("last block key %s", DBGKEY(&key)); + dbg_jnlk(&key, "last block key "); err = ubifs_tnc_lookup(c, &key, dn); if (err == -ENOENT) dlen = 0; /* Not found (so it is a hole) */ diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ccabaf1164b..b007637f040 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -221,8 +221,8 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) { int err; - dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, - r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); + dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", + r->lnum, r->offs, r->len, r->deletion, r->sqnum); /* Set c->replay_sqnum to help deal with dangling branches. */ c->replay_sqnum = r->sqnum; @@ -361,7 +361,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, { struct replay_entry *r; - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); @@ -409,7 +409,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, struct replay_entry *r; char *nbuf; - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e14ee53159d..16ad84d8402 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -505,7 +505,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, { int ret; - dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs); ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, zbr->offs); @@ -519,8 +519,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, ret = 0; } if (ret == 0 && c->replaying) - dbg_mnt("dangling branch LEB %d:%d len %d, key %s", - zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); + dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ", + zbr->lnum, zbr->offs, zbr->len); return ret; } @@ -995,9 +995,9 @@ static int fallible_resolve_collision(struct ubifs_info *c, if (adding || !o_znode) return 0; - dbg_mnt("dangling match LEB %d:%d len %d %s", + dbg_mntk(key, "dangling match LEB %d:%d len %d key ", o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, - o_znode->zbranch[o_n].len, DBGKEY(key)); + o_znode->zbranch[o_n].len); *zn = o_znode; *n = o_n; return 1; @@ -1179,7 +1179,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; unsigned long time = get_seconds(); - dbg_tnc("search key %s", DBGKEY(key)); + dbg_tnck(key, "search key "); ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); znode = c->zroot.znode; @@ -1315,7 +1315,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; unsigned long time = get_seconds(); - dbg_tnc("search and dirty key %s", DBGKEY(key)); + dbg_tnck(key, "search and dirty key "); znode = c->zroot.znode; if (unlikely(!znode)) { @@ -1722,8 +1722,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf, if (!keys_eq(c, &zbr->key, &key1)) { ubifs_err("bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_tnc("looked for key %s found node's key %s", - DBGKEY(&zbr->key), DBGKEY1(&key1)); + dbg_tnck(&zbr->key, "looked for key "); + dbg_tnck(&key1, "found node's key "); goto out_err; } @@ -1776,7 +1776,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) ubifs_err("failed to read from LEB %d:%d, error %d", lnum, offs, err); dbg_dump_stack(); - dbg_tnc("key %s", DBGKEY(&bu->key)); + dbg_tnck(&bu->key, "key "); return err; } @@ -1811,7 +1811,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, int found, n, err; struct ubifs_znode *znode; - dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); + dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name); mutex_lock(&c->tnc_mutex); found = ubifs_lookup_level0(c, key, &znode, &n); if (!found) { @@ -1985,8 +1985,7 @@ again: zp = znode->parent; if (znode->child_cnt < c->fanout) { ubifs_assert(n != c->fanout); - dbg_tnc("inserted at %d level %d, key %s", n, znode->level, - DBGKEY(key)); + dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level); insert_zbranch(znode, zbr, n); @@ -2001,7 +2000,7 @@ again: * Unfortunately, @znode does not have more empty slots and we have to * split it. */ - dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + dbg_tnck(key, "splitting level %d, key ", znode->level); if (znode->alt) /* @@ -2095,7 +2094,7 @@ do_split: } /* Insert new key and branch */ - dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level); insert_zbranch(zi, zbr, n); @@ -2171,7 +2170,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); + dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len); found = lookup_level0_dirty(c, key, &znode, &n); if (!found) { struct ubifs_zbranch zbr; @@ -2220,8 +2219,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, - old_offs, lnum, offs, len, DBGKEY(key)); + dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum, + old_offs, lnum, offs, len); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2303,8 +2302,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, - DBGKEY(key)); + dbg_tnck(key, "LEB %d:%d, name '%.*s', key ", + lnum, offs, nm->len, nm->name); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2397,7 +2396,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) /* Delete without merge for now */ ubifs_assert(znode->level == 0); ubifs_assert(n >= 0 && n < c->fanout); - dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + dbg_tnck(&znode->zbranch[n].key, "deleting key "); zbr = &znode->zbranch[n]; lnc_free(zbr); @@ -2507,7 +2506,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("key %s", DBGKEY(key)); + dbg_tnck(key, "key "); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2538,7 +2537,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); + dbg_tnck(key, "%.*s, key ", nm->len, nm->name); err = lookup_level0_dirty(c, key, &znode, &n); if (err < 0) goto out_unlock; @@ -2653,7 +2652,7 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, dbg_dump_znode(c, znode); goto out_unlock; } - dbg_tnc("removing %s", DBGKEY(key)); + dbg_tnck(key, "removing key "); } if (k) { for (i = n + 1 + k; i < znode->child_cnt; i++) @@ -2773,7 +2772,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, struct ubifs_zbranch *zbr; union ubifs_key *dkey; - dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); + dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)"); ubifs_assert(is_hash_key(c, key)); mutex_lock(&c->tnc_mutex); @@ -3332,9 +3331,9 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, out_dump: block = key_block(c, key); - ubifs_err("inode %lu has size %lld, but there are data at offset %lld " - "(data key %s)", (unsigned long)inode->i_ino, size, - ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); + ubifs_err("inode %lu has size %lld, but there are data at offset %lld", + (unsigned long)inode->i_ino, size, + ((loff_t)block) << UBIFS_BLOCK_SHIFT); mutex_unlock(&c->tnc_mutex); dbg_dump_inode(c, inode); dbg_dump_stack(); diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index b48db999903..dc28fe6ec07 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, case UBIFS_XENT_KEY: break; default: - dbg_msg("bad key type at slot %d: %s", i, - DBGKEY(&zbr->key)); + dbg_msg("bad key type at slot %d: %d", + i, key_type(c, &zbr->key)); err = 3; goto out_dump; } @@ -475,7 +475,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, zbr->offs); if (err) { - dbg_tnc("key %s", DBGKEY(key)); + dbg_tnck(key, "key "); return err; } @@ -484,8 +484,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (!keys_eq(c, key, &key1)) { ubifs_err("bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_tnc("looked for key %s found node's key %s", - DBGKEY(key), DBGKEY1(&key1)); + dbg_tnck(key, "looked for key "); + dbg_tnck(&key1, "but found node's key "); dbg_dump_node(c, node); return -EINVAL; } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e58fa777fa0..f96a5b58a97 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -139,6 +139,20 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) +/** + * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation + * This is a nop so far, because only x86 needs it. + */ +#ifndef __tlb_remove_pmd_tlb_entry +#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) +#endif + +#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ + do { \ + tlb->need_flush = 1; \ + __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ + } while (0) + #define pte_free_tlb(tlb, ptep, address) \ do { \ tlb->need_flush = 1; \ diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 63e4fce6728..4cd4be26722 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -453,7 +453,7 @@ struct drm_encoder_funcs { #define DRM_CONNECTOR_MAX_UMODES 16 #define DRM_CONNECTOR_MAX_PROPERTY 16 #define DRM_CONNECTOR_LEN 32 -#define DRM_CONNECTOR_MAX_ENCODER 2 +#define DRM_CONNECTOR_MAX_ENCODER 3 /** * drm_encoder - central DRM encoder structure diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index 21114810c7c..0101e9c17fa 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -30,6 +30,7 @@ struct dma_chan; * @cd_invert: true if the gpio_cd pin value is active low * @capabilities: the capabilities of the block as implemented in * this platform, signify anything MMC_CAP_* from mmc/host.h + * @capabilities2: more capabilities, MMC_CAP2_* from mmc/host.h * @dma_filter: function used to select an appropriate RX and TX * DMA channel to be used for DMA, if and only if you're deploying the * generic DMA engine @@ -52,6 +53,7 @@ struct mmci_platform_data { int gpio_cd; bool cd_invert; unsigned long capabilities; + unsigned long capabilities2; bool (*dma_filter)(struct dma_chan *chan, void *filter_param); void *dma_rx_param; void *dma_tx_param; diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 5c4abce94ad..b936763f223 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -5,6 +5,7 @@ #include <linux/kexec.h> #include <linux/device.h> #include <linux/proc_fs.h> +#include <linux/elf.h> #define ELFCORE_ADDR_MAX (-1ULL) #define ELFCORE_ADDR_ERR (-2ULL) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 31f73220e7d..d64a55b23af 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -242,6 +242,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index f362733186a..657ab55beda 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -61,6 +61,7 @@ struct file; static inline void eventpoll_init_file(struct file *file) { INIT_LIST_HEAD(&file->f_ep_links); + INIT_LIST_HEAD(&file->f_tfile_llink); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 7aacf31418f..4bc8169fb5a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -525,6 +525,7 @@ enum positive_aop_returns { struct page; struct address_space; struct writeback_control; +enum migrate_mode; struct iov_iter { const struct iovec *iov; @@ -609,9 +610,12 @@ struct address_space_operations { loff_t offset, unsigned long nr_segs); int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); - /* migrate the contents of a page to the specified target */ + /* + * migrate the contents of a page to the specified target. If sync + * is false, it must not block. + */ int (*migratepage) (struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); @@ -656,6 +660,7 @@ struct address_space { * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ +struct request_queue; struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ @@ -678,6 +683,7 @@ struct block_device { unsigned bd_part_count; int bd_invalidated; struct gendisk * bd_disk; + struct request_queue * bd_queue; struct list_head bd_list; /* * Private data. You must have bd_claim'ed the block_device @@ -1001,6 +1007,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; + struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; #ifdef CONFIG_DEBUG_WRITECOUNT @@ -2536,7 +2543,8 @@ extern int generic_check_addressable(unsigned, u64); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, + enum migrate_mode); #else #define buffer_migrate_page NULL #endif diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 4f4462974c1..b148087f49a 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -22,6 +22,8 @@ #define GFS2_LIVE_LOCK 1 #define GFS2_TRANS_LOCK 2 #define GFS2_RENAME_LOCK 3 +#define GFS2_CONTROL_LOCK 4 +#define GFS2_MOUNTED_LOCK 5 /* Format numbers for various metadata types */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a9ace9c3250..1b921299abc 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -18,7 +18,7 @@ extern struct page *follow_trans_huge_pmd(struct mm_struct *mm, unsigned int flags); extern int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - pmd_t *pmd); + pmd_t *pmd, unsigned long addr); extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec); diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 114c0f6fc63..78d3465251d 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -652,10 +652,12 @@ struct twl4030_power_data { unsigned num; struct twl4030_resconfig *resource_config; #define TWL4030_RESCONFIG_UNDEF ((u8)-1) + bool use_poweroff; /* Board is wired for TWL poweroff */ }; extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts); extern int twl4030_remove_script(u8 flags); +extern void twl4030_power_off(void); struct twl4030_codec_data { unsigned int digimic_delay; /* in ms */ diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 34e8d52c192..f1362b5447f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -22,7 +22,7 @@ struct inet_diag_sockid { /* Request structure */ -struct inet_diag_req_compat { +struct inet_diag_req { __u8 idiag_family; /* Family of addresses. */ __u8 idiag_src_len; __u8 idiag_dst_len; @@ -34,7 +34,7 @@ struct inet_diag_req_compat { __u32 idiag_dbs; /* Tables to dump (NI) */ }; -struct inet_diag_req { +struct inet_diag_req_v2 { __u8 sdiag_family; __u8 sdiag_protocol; __u8 idiag_ext; @@ -143,12 +143,12 @@ struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, struct nlattr *bc); int (*dump_one)(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req); + struct inet_diag_req_v2 *req); void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, @@ -158,15 +158,15 @@ struct inet_diag_handler { struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req *r, + struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req); + struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d0a7a0c7166..e8343422240 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -185,16 +185,17 @@ static inline void might_fault(void) extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(int state); -NORET_TYPE void panic(const char * fmt, ...) - __attribute__ ((NORET_AND format (printf, 1, 2))) __cold; +__printf(1, 2) +void panic(const char *fmt, ...) + __noreturn __cold; extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); extern int oops_may_print(void); -NORET_TYPE void do_exit(long error_code) - ATTRIB_NORET; -NORET_TYPE void complete_and_exit(struct completion *, long) - ATTRIB_NORET; +void do_exit(long error_code) + __noreturn; +void complete_and_exit(struct completion *, long) + __noreturn; /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index ee0c952188d..fee66317e07 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -18,7 +18,6 @@ enum kmsg_dump_reason { KMSG_DUMP_OOPS, KMSG_DUMP_PANIC, - KMSG_DUMP_KEXEC, KMSG_DUMP_RESTART, KMSG_DUMP_HALT, KMSG_DUMP_POWEROFF, diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 3f46aedea42..807f1e53322 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -88,8 +88,4 @@ #endif -#define NORET_TYPE /**/ -#define ATTRIB_NORET __attribute__((noreturn)) -#define NORET_AND noreturn, - #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f944591765e..4d34356fe64 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,13 +32,11 @@ enum mem_cgroup_page_stat_item { MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ }; -extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, - struct list_head *dst, - unsigned long *scanned, int order, - isolate_mode_t mode, - struct zone *z, - struct mem_cgroup *mem_cont, - int active, int file); +struct mem_cgroup_reclaim_cookie { + struct zone *zone; + int priority; + unsigned int generation; +}; #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* @@ -56,20 +54,21 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, - struct page *page, gfp_t mask, struct mem_cgroup **ptr); + struct page *page, gfp_t mask, struct mem_cgroup **memcgp); extern void mem_cgroup_commit_charge_swapin(struct page *page, - struct mem_cgroup *ptr); -extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); + struct mem_cgroup *memcg); +extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru); -extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru); -extern void mem_cgroup_rotate_reclaimable_page(struct page *page); -extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); -extern void mem_cgroup_del_lru(struct page *page); -extern void mem_cgroup_move_lists(struct page *page, - enum lru_list from, enum lru_list to); + +struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); +struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *, + enum lru_list); +void mem_cgroup_lru_del_list(struct page *, enum lru_list); +void mem_cgroup_lru_del(struct page *); +struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *, + enum lru_list, enum lru_list); /* For coalescing uncharge for reducing memcg' overhead*/ extern void mem_cgroup_uncharge_start(void); @@ -102,10 +101,15 @@ extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); extern int mem_cgroup_prepare_migration(struct page *page, - struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask); + struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask); extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok); +struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, + struct mem_cgroup *, + struct mem_cgroup_reclaim_cookie *); +void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); + /* * For memory reclaim. */ @@ -122,7 +126,10 @@ struct zone_reclaim_stat* mem_cgroup_get_reclaim_stat_from_page(struct page *page); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); +extern void mem_cgroup_replace_page_cache(struct page *oldpage, + struct page *newpage); +extern void mem_cgroup_reset_owner(struct page *page); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif @@ -157,7 +164,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg); void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); #ifdef CONFIG_TRANSPARENT_HUGEPAGE -void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail); +void mem_cgroup_split_huge_fixup(struct page *head); #endif #ifdef CONFIG_DEBUG_VM @@ -180,17 +187,17 @@ static inline int mem_cgroup_cache_charge(struct page *page, } static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, - struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) + struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp) { return 0; } static inline void mem_cgroup_commit_charge_swapin(struct page *page, - struct mem_cgroup *ptr) + struct mem_cgroup *memcg) { } -static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) +static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) { } @@ -210,33 +217,33 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline void mem_cgroup_add_lru_list(struct page *page, int lru) -{ -} - -static inline void mem_cgroup_del_lru_list(struct page *page, int lru) +static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, + struct mem_cgroup *memcg) { - return ; + return &zone->lruvec; } -static inline void mem_cgroup_rotate_reclaimable_page(struct page *page) +static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, + struct page *page, + enum lru_list lru) { - return ; + return &zone->lruvec; } -static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru) +static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) { - return ; } -static inline void mem_cgroup_del_lru(struct page *page) +static inline void mem_cgroup_lru_del(struct page *page) { - return ; } -static inline void -mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) +static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, + struct page *page, + enum lru_list from, + enum lru_list to) { + return &zone->lruvec; } static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) @@ -269,7 +276,7 @@ static inline struct cgroup_subsys_state static inline int mem_cgroup_prepare_migration(struct page *page, struct page *newpage, - struct mem_cgroup **ptr, gfp_t gfp_mask) + struct mem_cgroup **memcgp, gfp_t gfp_mask) { return 0; } @@ -279,6 +286,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, { } +static inline struct mem_cgroup * +mem_cgroup_iter(struct mem_cgroup *root, + struct mem_cgroup *prev, + struct mem_cgroup_reclaim_cookie *reclaim) +{ + return NULL; +} + +static inline void mem_cgroup_iter_break(struct mem_cgroup *root, + struct mem_cgroup *prev) +{ +} + static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg) { return 0; @@ -360,8 +380,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) return 0; } -static inline void mem_cgroup_split_huge_fixup(struct page *head, - struct page *tail) +static inline void mem_cgroup_split_huge_fixup(struct page *head) { } @@ -369,6 +388,14 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) { } +static inline void mem_cgroup_replace_page_cache(struct page *oldpage, + struct page *newpage) +{ +} + +static inline void mem_cgroup_reset_owner(struct page *page) +{ +} #endif /* CONFIG_CGROUP_MEM_CONT */ #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM) diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h index 63b4fb8e3b6..92be3476c9f 100644 --- a/include/linux/mfd/88pm860x.h +++ b/include/linux/mfd/88pm860x.h @@ -297,10 +297,11 @@ enum { struct pm860x_chip { struct device *dev; - struct mutex io_lock; struct mutex irq_lock; struct i2c_client *client; struct i2c_client *companion; /* companion chip client */ + struct regmap *regmap; + struct regmap *regmap_companion; int buck3_double; /* DVC ramp slope double */ unsigned short companion_addr; diff --git a/include/linux/mfd/ab5500/ab5500.h b/include/linux/mfd/abx500/ab5500.h index a720051ae93..a720051ae93 100644 --- a/include/linux/mfd/ab5500/ab5500.h +++ b/include/linux/mfd/abx500/ab5500.h diff --git a/include/linux/mfd/ab8500/gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h index 252966769d9..252966769d9 100644 --- a/include/linux/mfd/ab8500/gpadc.h +++ b/include/linux/mfd/abx500/ab8500-gpadc.h diff --git a/include/linux/mfd/ab8500/gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h index 488a8c920a2..488a8c920a2 100644 --- a/include/linux/mfd/ab8500/gpio.h +++ b/include/linux/mfd/abx500/ab8500-gpio.h diff --git a/include/linux/mfd/ab8500/sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h index 10da0291f8f..10da0291f8f 100644 --- a/include/linux/mfd/ab8500/sysctrl.h +++ b/include/linux/mfd/abx500/ab8500-sysctrl.h diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 838c6b487cc..838c6b487cc 100644 --- a/include/linux/mfd/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h index b8e6d944908..15b2392a56f 100644 --- a/include/linux/mfd/max8925.h +++ b/include/linux/mfd/max8925.h @@ -203,6 +203,8 @@ struct max8925_chip { int irq_base; int core_irq; int tsc_irq; + + unsigned int wakeup_flag; }; struct max8925_backlight_pdata { diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 0bbd13dbe33..fff590521e5 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -77,6 +77,82 @@ struct max8997_regulator_data { struct regulator_init_data *initdata; }; +enum max8997_muic_usb_type { + MAX8997_USB_HOST, + MAX8997_USB_DEVICE, +}; + +enum max8997_muic_charger_type { + MAX8997_CHARGER_TYPE_NONE = 0, + MAX8997_CHARGER_TYPE_USB, + MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT, + MAX8997_CHARGER_TYPE_DEDICATED_CHG, + MAX8997_CHARGER_TYPE_500MA, + MAX8997_CHARGER_TYPE_1A, + MAX8997_CHARGER_TYPE_DEAD_BATTERY = 7, +}; + +struct max8997_muic_reg_data { + u8 addr; + u8 data; +}; + +/** + * struct max8997_muic_platform_data + * @usb_callback: callback function for USB + * inform callee of USB type (HOST or DEVICE) + * and attached state(true or false) + * @charger_callback: callback function for charger + * inform callee of charger_type + * and attached state(true or false) + * @deskdock_callback: callback function for desk dock + * inform callee of attached state(true or false) + * @cardock_callback: callback function for car dock + * inform callee of attached state(true or false) + * @mhl_callback: callback function for MHL (Mobile High-definition Link) + * inform callee of attached state(true or false) + * @uart_callback: callback function for JIG UART + * inform callee of attached state(true or false) + * @init_data: array of max8997_muic_reg_data + * used for initializing registers of MAX8997 MUIC device + * @num_init_data: array size of init_data + */ +struct max8997_muic_platform_data { + void (*usb_callback)(enum max8997_muic_usb_type usb_type, + bool attached); + void (*charger_callback)(bool attached, + enum max8997_muic_charger_type charger_type); + void (*deskdock_callback) (bool attached); + void (*cardock_callback) (bool attached); + void (*mhl_callback) (bool attached); + void (*uart_callback) (bool attached); + + struct max8997_muic_reg_data *init_data; + int num_init_data; +}; + +enum max8997_led_mode { + MAX8997_NONE, + MAX8997_FLASH_MODE, + MAX8997_MOVIE_MODE, + MAX8997_FLASH_PIN_CONTROL_MODE, + MAX8997_MOVIE_PIN_CONTROL_MODE, +}; + +/** + * struct max8997_led_platform_data + * The number of LED devices for MAX8997 is two + * @mode: LED mode for each LED device + * @brightness: initial brightness for each LED device + * range: + * [0 - 31]: MAX8997_FLASH_MODE and MAX8997_FLASH_PIN_CONTROL_MODE + * [0 - 15]: MAX8997_MOVIE_MODE and MAX8997_MOVIE_PIN_CONTROL_MODE + */ +struct max8997_led_platform_data { + enum max8997_led_mode mode[2]; + u8 brightness[2]; +}; + struct max8997_platform_data { /* IRQ */ int irq_base; @@ -113,10 +189,13 @@ struct max8997_platform_data { /* charge Full Timeout */ int timeout; /* 0 (no timeout), 5, 6, 7 hours */ - /* MUIC: Not implemented */ + /* ---- MUIC ---- */ + struct max8997_muic_platform_data *muic_pdata; + /* HAPTIC: Not implemented */ /* RTC: Not implemented */ - /* Flash: Not implemented */ + /* ---- LED ---- */ + struct max8997_led_platform_data *led_pdata; }; #endif /* __LINUX_MFD_MAX8998_H */ diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index a98e2a316d1..b86ee45c8b0 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -174,6 +174,9 @@ struct mc13xxx_platform_data { #define MC13XXX_ADC_MODE_MULT_CHAN 3 #define MC13XXX_ADC0 43 +#define MC13XXX_ADC0_LICELLCON (1 << 0) +#define MC13XXX_ADC0_CHRGICON (1 << 1) +#define MC13XXX_ADC0_BATICON (1 << 2) #define MC13XXX_ADC0_ADREFEN (1 << 10) #define MC13XXX_ADC0_TSMOD0 (1 << 12) #define MC13XXX_ADC0_TSMOD1 (1 << 13) @@ -185,4 +188,9 @@ struct mc13xxx_platform_data { MC13XXX_ADC0_TSMOD1 | \ MC13XXX_ADC0_TSMOD2) +#define MC13XXX_ADC0_CONFIG_MASK (MC13XXX_ADC0_TSMOD_MASK | \ + MC13XXX_ADC0_LICELLCON | \ + MC13XXX_ADC0_CHRGICON | \ + MC13XXX_ADC0_BATICON) + #endif /* ifndef __LINUX_MFD_MC13XXX_H */ diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h index ee496708e38..1515e64e366 100644 --- a/include/linux/mfd/mcp.h +++ b/include/linux/mfd/mcp.h @@ -10,6 +10,7 @@ #ifndef MCP_H #define MCP_H +#include <linux/mod_devicetable.h> #include <mach/dma.h> struct mcp_ops; @@ -26,7 +27,7 @@ struct mcp { dma_device_t dma_telco_rd; dma_device_t dma_telco_wr; struct device attached_device; - int gpio_base; + const char *codec; }; struct mcp_ops { @@ -44,10 +45,11 @@ void mcp_reg_write(struct mcp *, unsigned int, unsigned int); unsigned int mcp_reg_read(struct mcp *, unsigned int); void mcp_enable(struct mcp *); void mcp_disable(struct mcp *); +const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp); #define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) struct mcp *mcp_host_alloc(struct device *, size_t); -int mcp_host_register(struct mcp *); +int mcp_host_register(struct mcp *, void *); void mcp_host_unregister(struct mcp *); struct mcp_driver { @@ -56,6 +58,7 @@ struct mcp_driver { void (*remove)(struct mcp *); int (*suspend)(struct mcp *, pm_message_t); int (*resume)(struct mcp *); + const struct mcp_device_id *id_table; }; int mcp_driver_register(struct mcp_driver *); diff --git a/include/linux/mfd/s5m87xx/s5m-core.h b/include/linux/mfd/s5m87xx/s5m-core.h new file mode 100644 index 00000000000..a7480b57f92 --- /dev/null +++ b/include/linux/mfd/s5m87xx/s5m-core.h @@ -0,0 +1,373 @@ +/* + * s5m-core.h + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MFD_S5M_CORE_H +#define __LINUX_MFD_S5M_CORE_H + +#define NUM_IRQ_REGS 4 + +enum s5m_device_type { + S5M8751X, + S5M8763X, + S5M8767X, +}; + +/* S5M8767 registers */ +enum s5m8767_reg { + S5M8767_REG_ID, + S5M8767_REG_INT1, + S5M8767_REG_INT2, + S5M8767_REG_INT3, + S5M8767_REG_INT1M, + S5M8767_REG_INT2M, + S5M8767_REG_INT3M, + S5M8767_REG_STATUS1, + S5M8767_REG_STATUS2, + S5M8767_REG_STATUS3, + S5M8767_REG_CTRL1, + S5M8767_REG_CTRL2, + S5M8767_REG_LOWBAT1, + S5M8767_REG_LOWBAT2, + S5M8767_REG_BUCHG, + S5M8767_REG_DVSRAMP, + S5M8767_REG_DVSTIMER2 = 0x10, + S5M8767_REG_DVSTIMER3, + S5M8767_REG_DVSTIMER4, + S5M8767_REG_LDO1, + S5M8767_REG_LDO2, + S5M8767_REG_LDO3, + S5M8767_REG_LDO4, + S5M8767_REG_LDO5, + S5M8767_REG_LDO6, + S5M8767_REG_LDO7, + S5M8767_REG_LDO8, + S5M8767_REG_LDO9, + S5M8767_REG_LDO10, + S5M8767_REG_LDO11, + S5M8767_REG_LDO12, + S5M8767_REG_LDO13, + S5M8767_REG_LDO14 = 0x20, + S5M8767_REG_LDO15, + S5M8767_REG_LDO16, + S5M8767_REG_LDO17, + S5M8767_REG_LDO18, + S5M8767_REG_LDO19, + S5M8767_REG_LDO20, + S5M8767_REG_LDO21, + S5M8767_REG_LDO22, + S5M8767_REG_LDO23, + S5M8767_REG_LDO24, + S5M8767_REG_LDO25, + S5M8767_REG_LDO26, + S5M8767_REG_LDO27, + S5M8767_REG_LDO28, + S5M8767_REG_UVLO = 0x31, + S5M8767_REG_BUCK1CTRL1, + S5M8767_REG_BUCK1CTRL2, + S5M8767_REG_BUCK2CTRL, + S5M8767_REG_BUCK2DVS1, + S5M8767_REG_BUCK2DVS2, + S5M8767_REG_BUCK2DVS3, + S5M8767_REG_BUCK2DVS4, + S5M8767_REG_BUCK2DVS5, + S5M8767_REG_BUCK2DVS6, + S5M8767_REG_BUCK2DVS7, + S5M8767_REG_BUCK2DVS8, + S5M8767_REG_BUCK3CTRL, + S5M8767_REG_BUCK3DVS1, + S5M8767_REG_BUCK3DVS2, + S5M8767_REG_BUCK3DVS3, + S5M8767_REG_BUCK3DVS4, + S5M8767_REG_BUCK3DVS5, + S5M8767_REG_BUCK3DVS6, + S5M8767_REG_BUCK3DVS7, + S5M8767_REG_BUCK3DVS8, + S5M8767_REG_BUCK4CTRL, + S5M8767_REG_BUCK4DVS1, + S5M8767_REG_BUCK4DVS2, + S5M8767_REG_BUCK4DVS3, + S5M8767_REG_BUCK4DVS4, + S5M8767_REG_BUCK4DVS5, + S5M8767_REG_BUCK4DVS6, + S5M8767_REG_BUCK4DVS7, + S5M8767_REG_BUCK4DVS8, + S5M8767_REG_BUCK5CTRL1, + S5M8767_REG_BUCK5CTRL2, + S5M8767_REG_BUCK5CTRL3, + S5M8767_REG_BUCK5CTRL4, + S5M8767_REG_BUCK5CTRL5, + S5M8767_REG_BUCK6CTRL1, + S5M8767_REG_BUCK6CTRL2, + S5M8767_REG_BUCK7CTRL1, + S5M8767_REG_BUCK7CTRL2, + S5M8767_REG_BUCK8CTRL1, + S5M8767_REG_BUCK8CTRL2, + S5M8767_REG_BUCK9CTRL1, + S5M8767_REG_BUCK9CTRL2, + S5M8767_REG_LDO1CTRL, + S5M8767_REG_LDO2_1CTRL, + S5M8767_REG_LDO2_2CTRL, + S5M8767_REG_LDO2_3CTRL, + S5M8767_REG_LDO2_4CTRL, + S5M8767_REG_LDO3CTRL, + S5M8767_REG_LDO4CTRL, + S5M8767_REG_LDO5CTRL, + S5M8767_REG_LDO6CTRL, + S5M8767_REG_LDO7CTRL, + S5M8767_REG_LDO8CTRL, + S5M8767_REG_LDO9CTRL, + S5M8767_REG_LDO10CTRL, + S5M8767_REG_LDO11CTRL, + S5M8767_REG_LDO12CTRL, + S5M8767_REG_LDO13CTRL, + S5M8767_REG_LDO14CTRL, + S5M8767_REG_LDO15CTRL, + S5M8767_REG_LDO16CTRL, + S5M8767_REG_LDO17CTRL, + S5M8767_REG_LDO18CTRL, + S5M8767_REG_LDO19CTRL, + S5M8767_REG_LDO20CTRL, + S5M8767_REG_LDO21CTRL, + S5M8767_REG_LDO22CTRL, + S5M8767_REG_LDO23CTRL, + S5M8767_REG_LDO24CTRL, + S5M8767_REG_LDO25CTRL, + S5M8767_REG_LDO26CTRL, + S5M8767_REG_LDO27CTRL, + S5M8767_REG_LDO28CTRL, +}; + +/* S5M8763 registers */ +enum s5m8763_reg { + S5M8763_REG_IRQ1, + S5M8763_REG_IRQ2, + S5M8763_REG_IRQ3, + S5M8763_REG_IRQ4, + S5M8763_REG_IRQM1, + S5M8763_REG_IRQM2, + S5M8763_REG_IRQM3, + S5M8763_REG_IRQM4, + S5M8763_REG_STATUS1, + S5M8763_REG_STATUS2, + S5M8763_REG_STATUSM1, + S5M8763_REG_STATUSM2, + S5M8763_REG_CHGR1, + S5M8763_REG_CHGR2, + S5M8763_REG_LDO_ACTIVE_DISCHARGE1, + S5M8763_REG_LDO_ACTIVE_DISCHARGE2, + S5M8763_REG_BUCK_ACTIVE_DISCHARGE3, + S5M8763_REG_ONOFF1, + S5M8763_REG_ONOFF2, + S5M8763_REG_ONOFF3, + S5M8763_REG_ONOFF4, + S5M8763_REG_BUCK1_VOLTAGE1, + S5M8763_REG_BUCK1_VOLTAGE2, + S5M8763_REG_BUCK1_VOLTAGE3, + S5M8763_REG_BUCK1_VOLTAGE4, + S5M8763_REG_BUCK2_VOLTAGE1, + S5M8763_REG_BUCK2_VOLTAGE2, + S5M8763_REG_BUCK3, + S5M8763_REG_BUCK4, + S5M8763_REG_LDO1_LDO2, + S5M8763_REG_LDO3, + S5M8763_REG_LDO4, + S5M8763_REG_LDO5, + S5M8763_REG_LDO6, + S5M8763_REG_LDO7, + S5M8763_REG_LDO7_LDO8, + S5M8763_REG_LDO9_LDO10, + S5M8763_REG_LDO11, + S5M8763_REG_LDO12, + S5M8763_REG_LDO13, + S5M8763_REG_LDO14, + S5M8763_REG_LDO15, + S5M8763_REG_LDO16, + S5M8763_REG_BKCHR, + S5M8763_REG_LBCNFG1, + S5M8763_REG_LBCNFG2, +}; + +enum s5m8767_irq { + S5M8767_IRQ_PWRR, + S5M8767_IRQ_PWRF, + S5M8767_IRQ_PWR1S, + S5M8767_IRQ_JIGR, + S5M8767_IRQ_JIGF, + S5M8767_IRQ_LOWBAT2, + S5M8767_IRQ_LOWBAT1, + + S5M8767_IRQ_MRB, + S5M8767_IRQ_DVSOK2, + S5M8767_IRQ_DVSOK3, + S5M8767_IRQ_DVSOK4, + + S5M8767_IRQ_RTC60S, + S5M8767_IRQ_RTCA1, + S5M8767_IRQ_RTCA2, + S5M8767_IRQ_SMPL, + S5M8767_IRQ_RTC1S, + S5M8767_IRQ_WTSR, + + S5M8767_IRQ_NR, +}; + +#define S5M8767_IRQ_PWRR_MASK (1 << 0) +#define S5M8767_IRQ_PWRF_MASK (1 << 1) +#define S5M8767_IRQ_PWR1S_MASK (1 << 3) +#define S5M8767_IRQ_JIGR_MASK (1 << 4) +#define S5M8767_IRQ_JIGF_MASK (1 << 5) +#define S5M8767_IRQ_LOWBAT2_MASK (1 << 6) +#define S5M8767_IRQ_LOWBAT1_MASK (1 << 7) + +#define S5M8767_IRQ_MRB_MASK (1 << 2) +#define S5M8767_IRQ_DVSOK2_MASK (1 << 3) +#define S5M8767_IRQ_DVSOK3_MASK (1 << 4) +#define S5M8767_IRQ_DVSOK4_MASK (1 << 5) + +#define S5M8767_IRQ_RTC60S_MASK (1 << 0) +#define S5M8767_IRQ_RTCA1_MASK (1 << 1) +#define S5M8767_IRQ_RTCA2_MASK (1 << 2) +#define S5M8767_IRQ_SMPL_MASK (1 << 3) +#define S5M8767_IRQ_RTC1S_MASK (1 << 4) +#define S5M8767_IRQ_WTSR_MASK (1 << 5) + +enum s5m8763_irq { + S5M8763_IRQ_DCINF, + S5M8763_IRQ_DCINR, + S5M8763_IRQ_JIGF, + S5M8763_IRQ_JIGR, + S5M8763_IRQ_PWRONF, + S5M8763_IRQ_PWRONR, + + S5M8763_IRQ_WTSREVNT, + S5M8763_IRQ_SMPLEVNT, + S5M8763_IRQ_ALARM1, + S5M8763_IRQ_ALARM0, + + S5M8763_IRQ_ONKEY1S, + S5M8763_IRQ_TOPOFFR, + S5M8763_IRQ_DCINOVPR, + S5M8763_IRQ_CHGRSTF, + S5M8763_IRQ_DONER, + S5M8763_IRQ_CHGFAULT, + + S5M8763_IRQ_LOBAT1, + S5M8763_IRQ_LOBAT2, + + S5M8763_IRQ_NR, +}; + +#define S5M8763_IRQ_DCINF_MASK (1 << 2) +#define S5M8763_IRQ_DCINR_MASK (1 << 3) +#define S5M8763_IRQ_JIGF_MASK (1 << 4) +#define S5M8763_IRQ_JIGR_MASK (1 << 5) +#define S5M8763_IRQ_PWRONF_MASK (1 << 6) +#define S5M8763_IRQ_PWRONR_MASK (1 << 7) + +#define S5M8763_IRQ_WTSREVNT_MASK (1 << 0) +#define S5M8763_IRQ_SMPLEVNT_MASK (1 << 1) +#define S5M8763_IRQ_ALARM1_MASK (1 << 2) +#define S5M8763_IRQ_ALARM0_MASK (1 << 3) + +#define S5M8763_IRQ_ONKEY1S_MASK (1 << 0) +#define S5M8763_IRQ_TOPOFFR_MASK (1 << 2) +#define S5M8763_IRQ_DCINOVPR_MASK (1 << 3) +#define S5M8763_IRQ_CHGRSTF_MASK (1 << 4) +#define S5M8763_IRQ_DONER_MASK (1 << 5) +#define S5M8763_IRQ_CHGFAULT_MASK (1 << 7) + +#define S5M8763_IRQ_LOBAT1_MASK (1 << 0) +#define S5M8763_IRQ_LOBAT2_MASK (1 << 1) + +#define S5M8763_ENRAMP (1 << 4) + +/** + * struct s5m87xx_dev - s5m87xx master device for sub-drivers + * @dev: master device of the chip (can be used to access platform data) + * @i2c: i2c client private data for regulator + * @rtc: i2c client private data for rtc + * @iolock: mutex for serializing io access + * @irqlock: mutex for buslock + * @irq_base: base IRQ number for s5m87xx, required for IRQs + * @irq: generic IRQ number for s5m87xx + * @ono: power onoff IRQ number for s5m87xx + * @irq_masks_cur: currently active value + * @irq_masks_cache: cached hardware value + * @type: indicate which s5m87xx "variant" is used + */ +struct s5m87xx_dev { + struct device *dev; + struct regmap *regmap; + struct i2c_client *i2c; + struct i2c_client *rtc; + struct mutex iolock; + struct mutex irqlock; + + int device_type; + int irq_base; + int irq; + int ono; + u8 irq_masks_cur[NUM_IRQ_REGS]; + u8 irq_masks_cache[NUM_IRQ_REGS]; + int type; + bool wakeup; +}; + +int s5m_irq_init(struct s5m87xx_dev *s5m87xx); +void s5m_irq_exit(struct s5m87xx_dev *s5m87xx); +int s5m_irq_resume(struct s5m87xx_dev *s5m87xx); + +extern int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest); +extern int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf); +extern int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value); +extern int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf); +extern int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask); + +struct s5m_platform_data { + struct s5m_regulator_data *regulators; + int device_type; + int num_regulators; + + int irq_base; + int (*cfg_pmic_irq)(void); + + int ono; + bool wakeup; + bool buck_voltage_lock; + + int buck_gpios[3]; + int buck2_voltage[8]; + bool buck2_gpiodvs; + int buck3_voltage[8]; + bool buck3_gpiodvs; + int buck4_voltage[8]; + bool buck4_gpiodvs; + + int buck_set1; + int buck_set2; + int buck_set3; + int buck2_enable; + int buck3_enable; + int buck4_enable; + int buck_default_idx; + int buck2_default_idx; + int buck3_default_idx; + int buck4_default_idx; + + int buck_ramp_delay; + bool buck2_ramp_enable; + bool buck3_ramp_enable; + bool buck4_ramp_enable; +}; + +#endif /* __LINUX_MFD_S5M_CORE_H */ diff --git a/include/linux/mfd/s5m87xx/s5m-pmic.h b/include/linux/mfd/s5m87xx/s5m-pmic.h new file mode 100644 index 00000000000..a72a5d27e62 --- /dev/null +++ b/include/linux/mfd/s5m87xx/s5m-pmic.h @@ -0,0 +1,100 @@ +/* s5m87xx.h + * + * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#ifndef __LINUX_MFD_S5M_PMIC_H +#define __LINUX_MFD_S5M_PMIC_H + +#include <linux/regulator/machine.h> + +/* S5M8767 regulator ids */ +enum s5m8767_regulators { + S5M8767_LDO1, + S5M8767_LDO2, + S5M8767_LDO3, + S5M8767_LDO4, + S5M8767_LDO5, + S5M8767_LDO6, + S5M8767_LDO7, + S5M8767_LDO8, + S5M8767_LDO9, + S5M8767_LDO10, + S5M8767_LDO11, + S5M8767_LDO12, + S5M8767_LDO13, + S5M8767_LDO14, + S5M8767_LDO15, + S5M8767_LDO16, + S5M8767_LDO17, + S5M8767_LDO18, + S5M8767_LDO19, + S5M8767_LDO20, + S5M8767_LDO21, + S5M8767_LDO22, + S5M8767_LDO23, + S5M8767_LDO24, + S5M8767_LDO25, + S5M8767_LDO26, + S5M8767_LDO27, + S5M8767_LDO28, + S5M8767_BUCK1, + S5M8767_BUCK2, + S5M8767_BUCK3, + S5M8767_BUCK4, + S5M8767_BUCK5, + S5M8767_BUCK6, + S5M8767_BUCK7, + S5M8767_BUCK8, + S5M8767_BUCK9, + S5M8767_AP_EN32KHZ, + S5M8767_CP_EN32KHZ, + + S5M8767_REG_MAX, +}; + +/* S5M8763 regulator ids */ +enum s5m8763_regulators { + S5M8763_LDO1, + S5M8763_LDO2, + S5M8763_LDO3, + S5M8763_LDO4, + S5M8763_LDO5, + S5M8763_LDO6, + S5M8763_LDO7, + S5M8763_LDO8, + S5M8763_LDO9, + S5M8763_LDO10, + S5M8763_LDO11, + S5M8763_LDO12, + S5M8763_LDO13, + S5M8763_LDO14, + S5M8763_LDO15, + S5M8763_LDO16, + S5M8763_BUCK1, + S5M8763_BUCK2, + S5M8763_BUCK3, + S5M8763_BUCK4, + S5M8763_AP_EN32KHZ, + S5M8763_CP_EN32KHZ, + S5M8763_ENCHGVI, + S5M8763_ESAFEUSB1, + S5M8763_ESAFEUSB2, +}; + +/** + * s5m87xx_regulator_data - regulator data + * @id: regulator id + * @initdata: regulator init data (contraints, supplies, ...) + */ +struct s5m_regulator_data { + int id; + struct regulator_init_data *initdata; +}; + +#endif /* __LINUX_MFD_S5M_PMIC_H */ diff --git a/include/linux/mfd/s5m87xx/s5m-rtc.h b/include/linux/mfd/s5m87xx/s5m-rtc.h new file mode 100644 index 00000000000..6ce8da264ce --- /dev/null +++ b/include/linux/mfd/s5m87xx/s5m-rtc.h @@ -0,0 +1,84 @@ +/* + * s5m-rtc.h + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MFD_S5M_RTC_H +#define __LINUX_MFD_S5M_RTC_H + +enum s5m87xx_rtc_reg { + S5M87XX_RTC_SEC, + S5M87XX_RTC_MIN, + S5M87XX_RTC_HOUR, + S5M87XX_RTC_WEEKDAY, + S5M87XX_RTC_DATE, + S5M87XX_RTC_MONTH, + S5M87XX_RTC_YEAR1, + S5M87XX_RTC_YEAR2, + S5M87XX_ALARM0_SEC, + S5M87XX_ALARM0_MIN, + S5M87XX_ALARM0_HOUR, + S5M87XX_ALARM0_WEEKDAY, + S5M87XX_ALARM0_DATE, + S5M87XX_ALARM0_MONTH, + S5M87XX_ALARM0_YEAR1, + S5M87XX_ALARM0_YEAR2, + S5M87XX_ALARM1_SEC, + S5M87XX_ALARM1_MIN, + S5M87XX_ALARM1_HOUR, + S5M87XX_ALARM1_WEEKDAY, + S5M87XX_ALARM1_DATE, + S5M87XX_ALARM1_MONTH, + S5M87XX_ALARM1_YEAR1, + S5M87XX_ALARM1_YEAR2, + S5M87XX_ALARM0_CONF, + S5M87XX_ALARM1_CONF, + S5M87XX_RTC_STATUS, + S5M87XX_WTSR_SMPL_CNTL, + S5M87XX_RTC_UDR_CON, +}; + +#define RTC_I2C_ADDR (0x0C >> 1) + +#define HOUR_12 (1 << 7) +#define HOUR_AMPM (1 << 6) +#define HOUR_PM (1 << 5) +#define ALARM0_STATUS (1 << 1) +#define ALARM1_STATUS (1 << 2) +#define UPDATE_AD (1 << 0) + +/* RTC Control Register */ +#define BCD_EN_SHIFT 0 +#define BCD_EN_MASK (1 << BCD_EN_SHIFT) +#define MODEL24_SHIFT 1 +#define MODEL24_MASK (1 << MODEL24_SHIFT) +/* RTC Update Register1 */ +#define RTC_UDR_SHIFT 0 +#define RTC_UDR_MASK (1 << RTC_UDR_SHIFT) +/* RTC Hour register */ +#define HOUR_PM_SHIFT 6 +#define HOUR_PM_MASK (1 << HOUR_PM_SHIFT) +/* RTC Alarm Enable */ +#define ALARM_ENABLE_SHIFT 7 +#define ALARM_ENABLE_MASK (1 << ALARM_ENABLE_SHIFT) + +enum { + RTC_SEC = 0, + RTC_MIN, + RTC_HOUR, + RTC_WEEKDAY, + RTC_DATE, + RTC_MONTH, + RTC_YEAR1, + RTC_YEAR2, +}; + +#endif /* __LINUX_MFD_S5M_RTC_H */ diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index be1af7c42e5..ca1d7a34760 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -20,6 +20,8 @@ enum stmpe_block { }; enum stmpe_partnum { + STMPE610, + STMPE801, STMPE811, STMPE1601, STMPE2401, @@ -50,17 +52,20 @@ enum { struct stmpe_variant_info; +struct stmpe_client_info; /** * struct stmpe - STMPE MFD structure * @lock: lock protecting I/O operations * @irq_lock: IRQ bus lock * @dev: device, mostly for dev_dbg() - * @i2c: i2c client + * @client: client - i2c or spi + * @ci: client specific information * @partnum: part number * @variant: the detected STMPE model number * @regs: list of addresses of registers which are at different addresses on * different variants. Indexed by one of STMPE_IDX_*. + * @irq: irq number for stmpe * @irq_base: starting IRQ number for internal IRQs * @num_gpios: number of gpios, differs for variants * @ier: cache of IER registers for bus_lock @@ -71,11 +76,13 @@ struct stmpe { struct mutex lock; struct mutex irq_lock; struct device *dev; - struct i2c_client *i2c; + void *client; + struct stmpe_client_info *ci; enum stmpe_partnum partnum; struct stmpe_variant_info *variant; const u8 *regs; + int irq; int irq_base; int num_gpios; u8 ier[2]; @@ -183,6 +190,9 @@ struct stmpe_ts_platform_data { * @autosleep_timeout: inactivity timeout in milliseconds for autosleep * @irq_base: base IRQ number. %STMPE_NR_IRQS irqs will be used, or * %STMPE_NR_INTERNAL_IRQS if the GPIO driver is not used. + * @irq_over_gpio: true if gpio is used to get irq + * @irq_gpio: gpio number over which irq will be requested (significant only if + * irq_over_gpio is true) * @gpio: GPIO-specific platform data * @keypad: keypad-specific platform data * @ts: touchscreen-specific platform data @@ -194,6 +204,8 @@ struct stmpe_platform_data { unsigned int irq_trigger; bool irq_invert_polarity; bool autosleep; + bool irq_over_gpio; + int irq_gpio; int autosleep_timeout; struct stmpe_gpio_platform_data *gpio; diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h index 4321f044d1e..bc19e5fb7ea 100644 --- a/include/linux/mfd/ucb1x00.h +++ b/include/linux/mfd/ucb1x00.h @@ -104,6 +104,9 @@ #define UCB_MODE_DYN_VFLAG_ENA (1 << 12) #define UCB_MODE_AUD_OFF_CAN (1 << 13) +struct ucb1x00_plat_data { + int gpio_base; +}; struct ucb1x00_irq { void *devid; @@ -116,7 +119,7 @@ struct ucb1x00 { unsigned int irq; struct semaphore adc_sem; spinlock_t io_lock; - u16 id; + const struct mcp_device_id *id; u16 io_dir; u16 io_out; u16 adc_cr; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e39aeecfe9a..eaf867412f7 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -6,18 +6,31 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); +/* + * MIGRATE_ASYNC means never block + * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking + * on most operations but not ->writepage as the potential stall time + * is too significant + * MIGRATE_SYNC will block when migrating pages + */ +enum migrate_mode { + MIGRATE_ASYNC, + MIGRATE_SYNC_LIGHT, + MIGRATE_SYNC, +}; + #ifdef CONFIG_MIGRATION #define PAGE_MIGRATION 1 extern void putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync); + enum migrate_mode mode); extern int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync); + enum migrate_mode mode); extern int fail_migrate_page(struct address_space *, struct page *, struct page *); @@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, static inline void putback_lru_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync) { return -ENOSYS; } + enum migrate_mode mode) { return -ENOSYS; } static inline int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - bool sync) { return -ENOSYS; } + enum migrate_mode mode) { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8f7d24712dc..227fd3e9a9c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -22,26 +22,21 @@ static inline int page_is_file_cache(struct page *page) } static inline void -__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l, - struct list_head *head) +add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru) { - list_add(&page->lru, head); - __mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page)); - mem_cgroup_add_lru_list(page, l); -} + struct lruvec *lruvec; -static inline void -add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) -{ - __add_page_to_lru_list(zone, page, l, &zone->lru[l].list); + lruvec = mem_cgroup_lru_add_list(zone, page, lru); + list_add(&page->lru, &lruvec->lists[lru]); + __mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page)); } static inline void -del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) +del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru) { + mem_cgroup_lru_del_list(page, lru); list_del(&page->lru); - __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page)); - mem_cgroup_del_lru_list(page, l); + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page)); } /** @@ -59,24 +54,28 @@ static inline enum lru_list page_lru_base_type(struct page *page) return LRU_INACTIVE_ANON; } -static inline void -del_page_from_lru(struct zone *zone, struct page *page) +/** + * page_off_lru - which LRU list was page on? clearing its lru flags. + * @page: the page to test + * + * Returns the LRU list a page was on, as an index into the array of LRU + * lists; and clears its Unevictable or Active flags, ready for freeing. + */ +static inline enum lru_list page_off_lru(struct page *page) { - enum lru_list l; + enum lru_list lru; - list_del(&page->lru); if (PageUnevictable(page)) { __ClearPageUnevictable(page); - l = LRU_UNEVICTABLE; + lru = LRU_UNEVICTABLE; } else { - l = page_lru_base_type(page); + lru = page_lru_base_type(page); if (PageActive(page)) { __ClearPageActive(page); - l += LRU_ACTIVE; + lru += LRU_ACTIVE; } } - __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page)); - mem_cgroup_del_lru_list(page, l); + return lru; } /** @@ -97,7 +96,6 @@ static inline enum lru_list page_lru(struct page *page) if (PageActive(page)) lru += LRU_ACTIVE; } - return lru; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5b42f1b34eb..3cc3062b376 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -151,12 +151,11 @@ struct page { #endif } /* - * If another subsystem starts using the double word pairing for atomic - * operations on struct page then it must change the #if to ensure - * proper alignment of the page struct. + * The struct page can be forced to be double word aligned so that atomic ops + * on double words work. The SLUB allocator can make use of such a feature. */ -#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL) - __attribute__((__aligned__(2*sizeof(unsigned long)))) +#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE + __aligned(2 * sizeof(unsigned long)) #endif ; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index c8ef9bc54d5..9f22ba572de 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -71,6 +71,8 @@ struct mmc_ext_csd { bool hpi_en; /* HPI enablebit */ bool hpi; /* HPI support bit */ unsigned int hpi_cmd; /* cmd used as HPI */ + unsigned int boot_ro_lock; /* ro lock support */ + bool boot_ro_lockable; u8 raw_partition_support; /* 160 */ u8 raw_erased_mem_count; /* 181 */ u8 raw_ext_csd_structure; /* 194 */ @@ -110,6 +112,7 @@ struct sd_ssr { struct sd_switch_caps { unsigned int hs_max_dtr; unsigned int uhs_max_dtr; +#define HIGH_SPEED_MAX_DTR 50000000 #define UHS_SDR104_MAX_DTR 208000000 #define UHS_SDR50_MAX_DTR 100000000 #define UHS_DDR50_MAX_DTR 50000000 @@ -117,11 +120,13 @@ struct sd_switch_caps { #define UHS_SDR12_MAX_DTR 25000000 unsigned int sd3_bus_mode; #define UHS_SDR12_BUS_SPEED 0 +#define HIGH_SPEED_BUS_SPEED 1 #define UHS_SDR25_BUS_SPEED 1 #define UHS_SDR50_BUS_SPEED 2 #define UHS_SDR104_BUS_SPEED 3 #define UHS_DDR50_BUS_SPEED 4 +#define SD_MODE_HIGH_SPEED (1 << HIGH_SPEED_BUS_SPEED) #define SD_MODE_UHS_SDR12 (1 << UHS_SDR12_BUS_SPEED) #define SD_MODE_UHS_SDR25 (1 << UHS_SDR25_BUS_SPEED) #define SD_MODE_UHS_SDR50 (1 << UHS_SDR50_BUS_SPEED) @@ -184,6 +189,10 @@ struct mmc_part { unsigned int part_cfg; /* partition type */ char name[MAX_MMC_PART_NAME_LEN]; bool force_ro; /* to make boot parts RO by default */ + unsigned int area_type; +#define MMC_BLK_DATA_AREA_MAIN (1<<0) +#define MMC_BLK_DATA_AREA_BOOT (1<<1) +#define MMC_BLK_DATA_AREA_GP (1<<2) }; /* @@ -206,6 +215,8 @@ struct mmc_card { #define MMC_STATE_HIGHSPEED_DDR (1<<4) /* card is in high speed mode */ #define MMC_STATE_ULTRAHIGHSPEED (1<<5) /* card is in ultra high speed mode */ #define MMC_CARD_SDXC (1<<6) /* card is SDXC */ +#define MMC_CARD_REMOVED (1<<7) /* card has been removed */ +#define MMC_STATE_HIGHSPEED_200 (1<<8) /* card is in HS200 mode */ unsigned int quirks; /* card quirks */ #define MMC_QUIRK_LENIENT_FN0 (1<<0) /* allow SDIO FN0 writes outside of the VS CCCR range */ #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1) /* use func->cur_blksize */ @@ -261,12 +272,14 @@ struct mmc_card { * This function fill contents in mmc_part. */ static inline void mmc_part_add(struct mmc_card *card, unsigned int size, - unsigned int part_cfg, char *name, int idx, bool ro) + unsigned int part_cfg, char *name, int idx, bool ro, + int area_type) { card->part[card->nr_parts].size = size; card->part[card->nr_parts].part_cfg = part_cfg; sprintf(card->part[card->nr_parts].name, name, idx); card->part[card->nr_parts].force_ro = ro; + card->part[card->nr_parts].area_type = area_type; card->nr_parts++; } @@ -362,18 +375,24 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) #define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED) +#define mmc_card_hs200(c) ((c)->state & MMC_STATE_HIGHSPEED_200) #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) #define mmc_card_ddr_mode(c) ((c)->state & MMC_STATE_HIGHSPEED_DDR) -#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED) +#define mmc_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED) +#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) +#define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED)) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED) +#define mmc_card_set_hs200(c) ((c)->state |= MMC_STATE_HIGHSPEED_200) #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR) +#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED) #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) +#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED) /* * Quirk add/remove for MMC products. diff --git a/include/linux/mmc/cd-gpio.h b/include/linux/mmc/cd-gpio.h new file mode 100644 index 00000000000..a8e46978331 --- /dev/null +++ b/include/linux/mmc/cd-gpio.h @@ -0,0 +1,19 @@ +/* + * Generic GPIO card-detect helper header + * + * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef MMC_CD_GPIO_H +#define MMC_CD_GPIO_H + +struct mmc_host; +int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio, + unsigned int irq, unsigned long flags); +void mmc_cd_gpio_free(struct mmc_host *host); + +#endif diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 174a844a5dd..87a976cc565 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -180,6 +180,8 @@ extern int mmc_try_claim_host(struct mmc_host *host); extern int mmc_flush_cache(struct mmc_card *); +extern int mmc_detect_card_removed(struct mmc_host *host); + /** * mmc_claim_host - exclusively claim a host * @host: mmc host to claim diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 6dc9b80568a..e8779c6d175 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -214,6 +214,7 @@ struct dw_mci_board { unsigned int bus_hz; /* Bus speed */ unsigned int caps; /* Capabilities */ + unsigned int caps2; /* More capabilities */ /* * Override fifo depth. If 0, autodetect it from the FIFOTH register, * but note that this may not be reliable after a bootloader has used diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8ef7894a48d..0beba1e5e1e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -56,10 +56,13 @@ struct mmc_ios { #define MMC_TIMING_UHS_SDR50 3 #define MMC_TIMING_UHS_SDR104 4 #define MMC_TIMING_UHS_DDR50 5 +#define MMC_TIMING_MMC_HS200 6 #define MMC_SDR_MODE 0 #define MMC_1_2V_DDR_MODE 1 #define MMC_1_8V_DDR_MODE 2 +#define MMC_1_2V_SDR_MODE 3 +#define MMC_1_8V_SDR_MODE 4 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -148,7 +151,9 @@ struct mmc_host_ops { void (*init_card)(struct mmc_host *host, struct mmc_card *card); int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios); - int (*execute_tuning)(struct mmc_host *host); + + /* The tuning command opcode value is different for SD and eMMC cards */ + int (*execute_tuning)(struct mmc_host *host, u32 opcode); void (*enable_preset_value)(struct mmc_host *host, bool enable); int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); void (*hw_reset)(struct mmc_host *host); @@ -167,6 +172,11 @@ struct mmc_async_req { int (*err_check) (struct mmc_card *, struct mmc_async_req *); }; +struct mmc_hotplug { + unsigned int irq; + void *handler_priv; +}; + struct mmc_host { struct device *parent; struct device class_dev; @@ -242,6 +252,11 @@ struct mmc_host { #define MMC_CAP2_CACHE_CTRL (1 << 1) /* Allow cache control */ #define MMC_CAP2_POWEROFF_NOTIFY (1 << 2) /* Notify poweroff supported */ #define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ +#define MMC_CAP2_NO_SLEEP_CMD (1 << 4) /* Don't allow sleep command */ +#define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ +#define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ +#define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ + MMC_CAP2_HS200_1_2V_SDR) mmc_pm_flag_t pm_caps; /* supported pm features */ unsigned int power_notify_type; @@ -253,10 +268,12 @@ struct mmc_host { int clk_requests; /* internal reference counter */ unsigned int clk_delay; /* number of MCI clk hold cycles */ bool clk_gated; /* clock gated */ - struct work_struct clk_gate_work; /* delayed clock gate */ + struct delayed_work clk_gate_work; /* delayed clock gate */ unsigned int clk_old; /* old clock value cache */ spinlock_t clk_lock; /* lock for clk fields */ struct mutex clk_gate_mutex; /* mutex for clock gating */ + struct device_attribute clkgate_delay_attr; + unsigned long clkgate_delay; #endif /* host specific block data */ @@ -297,6 +314,8 @@ struct mmc_host { int claim_cnt; /* "claim" nesting count */ struct delayed_work detect; + int detect_change; /* card detect flag */ + struct mmc_hotplug hotplug; const struct mmc_bus_ops *bus_ops; /* current bus driver */ unsigned int bus_refs; /* reference counter */ @@ -323,6 +342,8 @@ struct mmc_host { struct fault_attr fail_mmc_request; #endif + unsigned int actual_clock; /* Actual HC clock rate */ + unsigned long private[0] ____cacheline_aligned; }; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 0e7135697d1..fb9f6e116e1 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -51,6 +51,7 @@ #define MMC_READ_SINGLE_BLOCK 17 /* adtc [31:0] data addr R1 */ #define MMC_READ_MULTIPLE_BLOCK 18 /* adtc [31:0] data addr R1 */ #define MMC_SEND_TUNING_BLOCK 19 /* adtc R1 */ +#define MMC_SEND_TUNING_BLOCK_HS200 21 /* adtc R1 */ /* class 3 */ #define MMC_WRITE_DAT_UNTIL_STOP 20 /* adtc [31:0] data addr R1 */ @@ -280,6 +281,7 @@ struct _mmc_csd { #define EXT_CSD_RST_N_FUNCTION 162 /* R/W */ #define EXT_CSD_SANITIZE_START 165 /* W */ #define EXT_CSD_WR_REL_PARAM 166 /* RO */ +#define EXT_CSD_BOOT_WP 173 /* R/W */ #define EXT_CSD_ERASE_GROUP_DEF 175 /* R/W */ #define EXT_CSD_PART_CONFIG 179 /* R/W */ #define EXT_CSD_ERASED_MEM_CONT 181 /* RO */ @@ -321,6 +323,11 @@ struct _mmc_csd { #define EXT_CSD_WR_REL_PARAM_EN (1<<2) +#define EXT_CSD_BOOT_WP_B_PWR_WP_DIS (0x40) +#define EXT_CSD_BOOT_WP_B_PERM_WP_DIS (0x10) +#define EXT_CSD_BOOT_WP_B_PERM_WP_EN (0x04) +#define EXT_CSD_BOOT_WP_B_PWR_WP_EN (0x01) + #define EXT_CSD_PART_CONFIG_ACC_MASK (0x7) #define EXT_CSD_PART_CONFIG_ACC_BOOT0 (0x1) #define EXT_CSD_PART_CONFIG_ACC_GP0 (0x4) @@ -333,13 +340,76 @@ struct _mmc_csd { #define EXT_CSD_CARD_TYPE_26 (1<<0) /* Card can run at 26MHz */ #define EXT_CSD_CARD_TYPE_52 (1<<1) /* Card can run at 52MHz */ -#define EXT_CSD_CARD_TYPE_MASK 0xF /* Mask out reserved bits */ +#define EXT_CSD_CARD_TYPE_MASK 0x3F /* Mask out reserved bits */ #define EXT_CSD_CARD_TYPE_DDR_1_8V (1<<2) /* Card can run at 52MHz */ /* DDR mode @1.8V or 3V I/O */ #define EXT_CSD_CARD_TYPE_DDR_1_2V (1<<3) /* Card can run at 52MHz */ /* DDR mode @1.2V I/O */ #define EXT_CSD_CARD_TYPE_DDR_52 (EXT_CSD_CARD_TYPE_DDR_1_8V \ | EXT_CSD_CARD_TYPE_DDR_1_2V) +#define EXT_CSD_CARD_TYPE_SDR_1_8V (1<<4) /* Card can run at 200MHz */ +#define EXT_CSD_CARD_TYPE_SDR_1_2V (1<<5) /* Card can run at 200MHz */ + /* SDR mode @1.2V I/O */ + +#define EXT_CSD_CARD_TYPE_SDR_200 (EXT_CSD_CARD_TYPE_SDR_1_8V | \ + EXT_CSD_CARD_TYPE_SDR_1_2V) + +#define EXT_CSD_CARD_TYPE_SDR_ALL (EXT_CSD_CARD_TYPE_SDR_200 | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_2V_ALL (EXT_CSD_CARD_TYPE_SDR_1_2V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_8V_ALL (EXT_CSD_CARD_TYPE_SDR_1_8V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V (EXT_CSD_CARD_TYPE_SDR_1_2V | \ + EXT_CSD_CARD_TYPE_DDR_1_8V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V (EXT_CSD_CARD_TYPE_SDR_1_8V | \ + EXT_CSD_CARD_TYPE_DDR_1_8V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V (EXT_CSD_CARD_TYPE_SDR_1_2V | \ + EXT_CSD_CARD_TYPE_DDR_1_2V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V (EXT_CSD_CARD_TYPE_SDR_1_8V | \ + EXT_CSD_CARD_TYPE_DDR_1_2V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52 (EXT_CSD_CARD_TYPE_SDR_1_2V | \ + EXT_CSD_CARD_TYPE_DDR_52 | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52 (EXT_CSD_CARD_TYPE_SDR_1_8V | \ + EXT_CSD_CARD_TYPE_DDR_52 | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V (EXT_CSD_CARD_TYPE_SDR_200 | \ + EXT_CSD_CARD_TYPE_DDR_1_8V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V (EXT_CSD_CARD_TYPE_SDR_200 | \ + EXT_CSD_CARD_TYPE_DDR_1_2V | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) + +#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52 (EXT_CSD_CARD_TYPE_SDR_200 | \ + EXT_CSD_CARD_TYPE_DDR_52 | \ + EXT_CSD_CARD_TYPE_52 | \ + EXT_CSD_CARD_TYPE_26) #define EXT_CSD_BUS_WIDTH_1 0 /* Card is in 1 bit mode */ #define EXT_CSD_BUS_WIDTH_4 1 /* Card is in 4 bit mode */ diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h new file mode 100644 index 00000000000..8959604a13d --- /dev/null +++ b/include/linux/mmc/sdhci-pci-data.h @@ -0,0 +1,18 @@ +#ifndef LINUX_MMC_SDHCI_PCI_DATA_H +#define LINUX_MMC_SDHCI_PCI_DATA_H + +struct pci_dev; + +struct sdhci_pci_data { + struct pci_dev *pdev; + int slotno; + int rst_n_gpio; /* Set to -EINVAL if unused */ + int cd_gpio; /* Set to -EINVAL if unused */ + int (*setup)(struct sdhci_pci_data *data); + void (*cleanup)(struct sdhci_pci_data *data); +}; + +extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, + int slotno); + +#endif diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index e4b69353678..c750f85177d 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -90,8 +90,6 @@ struct sdhci_host { unsigned int quirks2; /* More deviations from spec. */ -#define SDHCI_QUIRK2_OWN_CARD_DETECTION (1<<0) - int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ @@ -121,6 +119,7 @@ struct sdhci_host { #define SDHCI_AUTO_CMD23 (1<<7) /* Auto CMD23 support */ #define SDHCI_PV_ENABLED (1<<8) /* Preset value enabled */ #define SDHCI_SDIO_IRQ_ENABLED (1<<9) /* SDIO irq enabled */ +#define SDHCI_HS200_NEEDS_TUNING (1<<10) /* HS200 needs tuning */ unsigned int version; /* SDHCI spec. version */ diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index e0b1123497b..c9fe66c58f8 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -38,6 +38,7 @@ * [8:0] Byte/block count */ +#define R4_18V_PRESENT (1<<24) #define R4_MEMORY_PRESENT (1 << 27) /* @@ -85,6 +86,7 @@ #define SDIO_SD_REV_1_01 0 /* SD Physical Spec Version 1.01 */ #define SDIO_SD_REV_1_10 1 /* SD Physical Spec Version 1.10 */ #define SDIO_SD_REV_2_00 2 /* SD Physical Spec Version 2.00 */ +#define SDIO_SD_REV_3_00 3 /* SD Physical Spev Version 3.00 */ #define SDIO_CCCR_IOEx 0x02 #define SDIO_CCCR_IORx 0x03 @@ -134,8 +136,31 @@ #define SDIO_CCCR_SPEED 0x13 #define SDIO_SPEED_SHS 0x01 /* Supports High-Speed mode */ -#define SDIO_SPEED_EHS 0x02 /* Enable High-Speed mode */ - +#define SDIO_SPEED_BSS_SHIFT 1 +#define SDIO_SPEED_BSS_MASK (7<<SDIO_SPEED_BSS_SHIFT) +#define SDIO_SPEED_SDR12 (0<<SDIO_SPEED_BSS_SHIFT) +#define SDIO_SPEED_SDR25 (1<<SDIO_SPEED_BSS_SHIFT) +#define SDIO_SPEED_SDR50 (2<<SDIO_SPEED_BSS_SHIFT) +#define SDIO_SPEED_SDR104 (3<<SDIO_SPEED_BSS_SHIFT) +#define SDIO_SPEED_DDR50 (4<<SDIO_SPEED_BSS_SHIFT) +#define SDIO_SPEED_EHS SDIO_SPEED_SDR25 /* Enable High-Speed */ + +#define SDIO_CCCR_UHS 0x14 +#define SDIO_UHS_SDR50 0x01 +#define SDIO_UHS_SDR104 0x02 +#define SDIO_UHS_DDR50 0x04 + +#define SDIO_CCCR_DRIVE_STRENGTH 0x15 +#define SDIO_SDTx_MASK 0x07 +#define SDIO_DRIVE_SDTA (1<<0) +#define SDIO_DRIVE_SDTC (1<<1) +#define SDIO_DRIVE_SDTD (1<<2) +#define SDIO_DRIVE_DTSx_MASK 0x03 +#define SDIO_DRIVE_DTSx_SHIFT 4 +#define SDIO_DTSx_SET_TYPE_B (0 << SDIO_DRIVE_DTSx_SHIFT) +#define SDIO_DTSx_SET_TYPE_A (1 << SDIO_DRIVE_DTSx_SHIFT) +#define SDIO_DTSx_SET_TYPE_C (2 << SDIO_DRIVE_DTSx_SHIFT) +#define SDIO_DTSx_SET_TYPE_D (3 << SDIO_DRIVE_DTSx_SHIFT) /* * Function Basic Registers (FBR) */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ca6ca92418a..650ba2fb330 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -140,25 +140,29 @@ enum lru_list { NR_LRU_LISTS }; -#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) +#define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) -#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++) +#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) -static inline int is_file_lru(enum lru_list l) +static inline int is_file_lru(enum lru_list lru) { - return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE); + return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); } -static inline int is_active_lru(enum lru_list l) +static inline int is_active_lru(enum lru_list lru) { - return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE); + return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } -static inline int is_unevictable_lru(enum lru_list l) +static inline int is_unevictable_lru(enum lru_list lru) { - return (l == LRU_UNEVICTABLE); + return (lru == LRU_UNEVICTABLE); } +struct lruvec { + struct list_head lists[NR_LRU_LISTS]; +}; + /* Mask used at gathering information at once (see memcontrol.c) */ #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) @@ -173,6 +177,8 @@ static inline int is_unevictable_lru(enum lru_list l) #define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) /* Isolate unmapped file */ #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) +/* Isolate for asynchronous migration */ +#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10) /* LRU Isolation modes. */ typedef unsigned __bitwise__ isolate_mode_t; @@ -364,10 +370,8 @@ struct zone { ZONE_PADDING(_pad1_) /* Fields commonly accessed by the page reclaim scanner */ - spinlock_t lru_lock; - struct zone_lru { - struct list_head list; - } lru[NR_LRU_LISTS]; + spinlock_t lru_lock; + struct lruvec lruvec; struct zone_reclaim_stat reclaim_stat; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 83ac0713ed0..b29e7f6f8fa 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -436,6 +436,17 @@ struct spi_device_id { __attribute__((aligned(sizeof(kernel_ulong_t)))); }; +/* mcp */ + +#define MCP_NAME_SIZE 20 +#define MCP_MODULE_PREFIX "mcp:" + +struct mcp_device_id { + char name[MCP_NAME_SIZE]; + kernel_ulong_t driver_data /* Data private to the driver */ + __attribute__((aligned(sizeof(kernel_ulong_t)))); +}; + /* dmi */ enum dmi_field { DMI_NONE, diff --git a/include/linux/oom.h b/include/linux/oom.h index 6f9d04a8533..552fba9c7d5 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -43,7 +43,7 @@ enum oom_constraint { extern void compare_swap_oom_score_adj(int old_val, int new_val); extern int test_set_oom_score_adj(int new_val); -extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, +extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 961ecc7d30b..a2d11771c84 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -10,8 +10,6 @@ enum { /* flags for mem_cgroup and file and I/O status */ PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ PCG_FILE_MAPPED, /* page is accounted as "mapped" */ - /* No lock in page_cgroup */ - PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ __NR_PCG_FLAGS, }; @@ -31,7 +29,6 @@ enum { struct page_cgroup { unsigned long flags; struct mem_cgroup *mem_cgroup; - struct list_head lru; /* per cgroup LRU list */ }; void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); @@ -76,12 +73,6 @@ TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) SETPCGFLAG(Used, USED) -SETPCGFLAG(AcctLRU, ACCT_LRU) -CLEARPCGFLAG(AcctLRU, ACCT_LRU) -TESTPCGFLAG(AcctLRU, ACCT_LRU) -TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) - - SETPCGFLAG(FileMapped, FILE_MAPPED) CLEARPCGFLAG(FileMapped, FILE_MAPPED) TESTPCGFLAG(FileMapped, FILE_MAPPED) @@ -122,39 +113,6 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc, local_irq_restore(*flags); } -#ifdef CONFIG_SPARSEMEM -#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT -#else -#define PCG_ARRAYID_WIDTH NODES_SHIFT -#endif - -#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS) -#error Not enough space left in pc->flags to store page_cgroup array IDs -#endif - -/* pc->flags: ARRAY-ID | FLAGS */ - -#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1) - -#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH) -/* - * Zero the shift count for non-existent fields, to prevent compiler - * warnings and ensure references are optimized away. - */ -#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0)) - -static inline void set_page_cgroup_array_id(struct page_cgroup *pc, - unsigned long id) -{ - pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT); - pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT; -} - -static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) -{ - return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; -} - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; @@ -183,7 +141,7 @@ static inline void __init page_cgroup_init_flatmem(void) extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); -extern unsigned short lookup_swap_cgroup(swp_entry_t ent); +extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); #else @@ -195,7 +153,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) } static inline -unsigned short lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { return 0; } diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index ed17024d2eb..2aa12b8499c 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -21,8 +21,7 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); -void pagevec_strip(struct pagevec *pvec); +void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, @@ -59,7 +58,6 @@ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page) return pagevec_space(pvec); } - static inline void pagevec_release(struct pagevec *pvec) { if (pagevec_count(pvec)) @@ -68,22 +66,22 @@ static inline void pagevec_release(struct pagevec *pvec) static inline void __pagevec_lru_add_anon(struct pagevec *pvec) { - ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON); + __pagevec_lru_add(pvec, LRU_INACTIVE_ANON); } static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec) { - ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON); + __pagevec_lru_add(pvec, LRU_ACTIVE_ANON); } static inline void __pagevec_lru_add_file(struct pagevec *pvec) { - ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE); + __pagevec_lru_add(pvec, LRU_INACTIVE_FILE); } static inline void __pagevec_lru_add_active_file(struct pagevec *pvec) { - ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE); + __pagevec_lru_add(pvec, LRU_ACTIVE_FILE); } static inline void pagevec_lru_add_file(struct pagevec *pvec) diff --git a/include/linux/phy.h b/include/linux/phy.h index 79f337c4738..c599f7eca1e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -129,7 +129,12 @@ struct mii_bus { }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) -struct mii_bus *mdiobus_alloc(void); +struct mii_bus *mdiobus_alloc_size(size_t); +static inline struct mii_bus *mdiobus_alloc(void) +{ + return mdiobus_alloc_size(0); +} + int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 8f1b928f777..0d5b79365d0 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -162,10 +162,30 @@ struct tc_sfq_qopt { unsigned flows; /* Maximal number of flows */ }; +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + struct tc_sfq_qopt_v1 { struct tc_sfq_qopt v0; unsigned int depth; /* max number of packets per flow */ unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; }; diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a3baeb2c216..7ddc7f1b480 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -102,4 +102,16 @@ #define PR_MCE_KILL_GET 34 +/* + * Tune up process memory map specifics. + */ +#define PR_SET_MM 35 +# define PR_SET_MM_START_CODE 1 +# define PR_SET_MM_END_CODE 2 +# define PR_SET_MM_START_DATA 3 +# define PR_SET_MM_END_DATA 4 +# define PR_SET_MM_START_STACK 5 +# define PR_SET_MM_START_BRK 6 +# define PR_SET_MM_BRK 7 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 9d4539c52e5..07e360b1b28 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -49,9 +49,6 @@ #define RADIX_TREE_EXCEPTIONAL_ENTRY 2 #define RADIX_TREE_EXCEPTIONAL_SHIFT 2 -#define radix_tree_indirect_to_ptr(ptr) \ - radix_tree_indirect_to_ptr((void __force *)(ptr)) - static inline int radix_tree_is_indirect_ptr(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1afb9954bbf..1cdd62a2788 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -158,7 +158,7 @@ static inline void page_dup_rmap(struct page *page) * Called from mm/vmscan.c to handle paging out */ int page_referenced(struct page *, int is_locked, - struct mem_cgroup *cnt, unsigned long *vm_flags); + struct mem_cgroup *memcg, unsigned long *vm_flags); int page_referenced_one(struct page *, struct vm_area_struct *, unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); @@ -236,7 +236,7 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *, #define anon_vma_link(vma) do {} while (0) static inline int page_referenced(struct page *page, int is_locked, - struct mem_cgroup *cnt, + struct mem_cgroup *memcg, unsigned long *vm_flags) { *vm_flags = 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index 21cd0303af5..4032ec1cf83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2275,7 +2275,7 @@ extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); extern void flush_itimer_signals(void); -extern NORET_TYPE void do_group_exit(int); +extern void do_group_exit(int); extern void daemonize(const char *, ...); extern int allow_signal(int); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 8620f79658d..dfa900948af 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); -void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, +void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int); @@ -118,7 +118,6 @@ void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); void svc_close_xprt(struct svc_xprt *xprt); -void svc_delete_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 85c50b40759..c84e9741cb2 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -34,7 +34,7 @@ struct svc_sock { /* * Function prototypes. */ -void svc_close_all(struct list_head *); +void svc_close_all(struct svc_serv *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5e2e9845849..ea9231f4935 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -127,7 +127,7 @@ struct hci_dev { __u8 major_class; __u8 minor_class; __u8 features[8]; - __u8 extfeatures[8]; + __u8 host_features[8]; __u8 commands[64]; __u8 ssp_mode; __u8 hci_ver; @@ -676,7 +676,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define lmp_le_capable(dev) ((dev)->features[4] & LMP_LE) /* ----- Extended LMP capabilities ----- */ -#define lmp_host_le_capable(dev) ((dev)->extfeatures[0] & LMP_HOST_LE) +#define lmp_host_le_capable(dev) ((dev)->host_features[0] & LMP_HOST_LE) /* ----- HCI protocols ----- */ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, diff --git a/include/net/red.h b/include/net/red.h index baab385a473..28068ec614b 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -199,7 +199,8 @@ static inline void red_set_parms(struct red_parms *p, p->Scell_log = Scell_log; p->Scell_max = (255 << Scell_log); - memcpy(p->Stab, stab, sizeof(p->Stab)); + if (stab) + memcpy(p->Stab, stab, sizeof(p->Stab)); } static inline int red_is_idling(const struct red_vars *v) diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index edc4b3d25a2..f64560e204b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -266,9 +266,10 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - isolate_mode_t isolate_mode), + isolate_mode_t isolate_mode, + int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), TP_STRUCT__entry( __field(int, order) @@ -279,6 +280,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) __field(isolate_mode_t, isolate_mode) + __field(int, file) ), TP_fast_assign( @@ -290,9 +292,10 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->nr_lumpy_dirty = nr_lumpy_dirty; __entry->nr_lumpy_failed = nr_lumpy_failed; __entry->isolate_mode = isolate_mode; + __entry->file = file; ), - TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu", + TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", __entry->isolate_mode, __entry->order, __entry->nr_requested, @@ -300,7 +303,8 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->nr_taken, __entry->nr_lumpy_taken, __entry->nr_lumpy_dirty, - __entry->nr_lumpy_failed) + __entry->nr_lumpy_failed, + __entry->file) ); DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, @@ -312,9 +316,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - isolate_mode_t isolate_mode), + isolate_mode_t isolate_mode, + int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) ); @@ -327,9 +332,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - isolate_mode_t isolate_mode), + isolate_mode_t isolate_mode, + int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) ); diff --git a/init/Kconfig b/init/Kconfig index 018d206c21f..6ac2236244c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -783,6 +783,17 @@ config DEBUG_BLK_CGROUP endif # CGROUPS +config CHECKPOINT_RESTORE + bool "Checkpoint/restore support" if EXPERT + default n + help + Enables additional kernel features in a sake of checkpoint/restore. + In particular it adds auxiliary prctl codes to setup process text, + data and heap segment sizes, and a few additional /proc filesystem + entries. + + If unsure, say N here. + menuconfig NAMESPACES bool "Namespaces support" if EXPERT default !EXPERT diff --git a/kernel/exit.c b/kernel/exit.c index 94ed6e20bb5..c44738267be 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -887,7 +887,7 @@ static void check_stack_usage(void) static inline void check_stack_usage(void) {} #endif -NORET_TYPE void do_exit(long code) +void do_exit(long code) { struct task_struct *tsk = current; int group_dead; @@ -1051,7 +1051,7 @@ NORET_TYPE void do_exit(long code) EXPORT_SYMBOL_GPL(do_exit); -NORET_TYPE void complete_and_exit(struct completion *comp, long code) +void complete_and_exit(struct completion *comp, long code) { if (comp) complete(comp); @@ -1070,7 +1070,7 @@ SYSCALL_DEFINE1(exit, int, error_code) * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). */ -NORET_TYPE void +void do_group_exit(int exit_code) { struct signal_struct *sig = current->signal; diff --git a/kernel/kexec.c b/kernel/kexec.c index 090ee10d960..7b088678670 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -32,7 +32,6 @@ #include <linux/console.h> #include <linux/vmalloc.h> #include <linux/swap.h> -#include <linux/kmsg_dump.h> #include <linux/syscore_ops.h> #include <asm/page.h> @@ -1094,8 +1093,6 @@ void crash_kexec(struct pt_regs *regs) if (kexec_crash_image) { struct pt_regs fixed_regs; - kmsg_dump(KMSG_DUMP_KEXEC); - crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); @@ -1132,6 +1129,8 @@ int crash_shrink_memory(unsigned long new_size) { int ret = 0; unsigned long start, end; + unsigned long old_size; + struct resource *ram_res; mutex_lock(&kexec_mutex); @@ -1141,11 +1140,15 @@ int crash_shrink_memory(unsigned long new_size) } start = crashk_res.start; end = crashk_res.end; + old_size = (end == 0) ? 0 : end - start + 1; + if (new_size >= old_size) { + ret = (new_size == old_size) ? 0 : -EINVAL; + goto unlock; + } - if (new_size >= end - start + 1) { - ret = -EINVAL; - if (new_size == end - start + 1) - ret = 0; + ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); + if (!ram_res) { + ret = -ENOMEM; goto unlock; } @@ -1157,7 +1160,15 @@ int crash_shrink_memory(unsigned long new_size) if ((start == end) && (crashk_res.parent != NULL)) release_resource(&crashk_res); + + ram_res->start = end; + ram_res->end = crashk_res.end; + ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + ram_res->name = "System RAM"; + crashk_res.end = end - 1; + + insert_resource(&iomem_resource, ram_res); crash_unmap_reserved_pages(); unlock: diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e5d84644823..95dd7212e61 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2198,7 +2198,7 @@ static ssize_t write_enabled_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { char buf[32]; - int buf_size; + size_t buf_size; buf_size = min(count, (sizeof(buf)-1)); if (copy_from_user(buf, user_buf, buf_size)) diff --git a/kernel/panic.c b/kernel/panic.c index 3458469eb7c..80aed44e345 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -49,6 +49,15 @@ static long no_blink(int state) long (*panic_blink)(int state); EXPORT_SYMBOL(panic_blink); +/* + * Stop ourself in panic -- architecture code may override this + */ +void __weak panic_smp_self_stop(void) +{ + while (1) + cpu_relax(); +} + /** * panic - halt the system * @fmt: The text string to print @@ -57,8 +66,9 @@ EXPORT_SYMBOL(panic_blink); * * This function never returns. */ -NORET_TYPE void panic(const char * fmt, ...) +void panic(const char *fmt, ...) { + static DEFINE_SPINLOCK(panic_lock); static char buf[1024]; va_list args; long i, i_next = 0; @@ -68,8 +78,14 @@ NORET_TYPE void panic(const char * fmt, ...) * It's possible to come here directly from a panic-assertion and * not have preempt disabled. Some functions called from here want * preempt to be disabled. No point enabling it later though... + * + * Only one CPU is allowed to execute the panic code from here. For + * multiple parallel invocations of panic, all other CPUs either + * stop themself or will wait until they are stopped by the 1st CPU + * with smp_send_stop(). */ - preempt_disable(); + if (!spin_trylock(&panic_lock)) + panic_smp_self_stop(); console_verbose(); bust_spinlocks(1); @@ -78,7 +94,11 @@ NORET_TYPE void panic(const char * fmt, ...) va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); #ifdef CONFIG_DEBUG_BUGVERBOSE - dump_stack(); + /* + * Avoid nested stack-dumping if a panic occurs during oops processing + */ + if (!oops_in_progress) + dump_stack(); #endif /* diff --git a/kernel/pid.c b/kernel/pid.c index fa5f72227e5..ce8e00deacc 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -137,7 +137,9 @@ static int pid_before(int base, int a, int b) } /* - * We might be racing with someone else trying to set pid_ns->last_pid. + * We might be racing with someone else trying to set pid_ns->last_pid + * at the pid allocation time (there's also a sysctl for this, but racing + * with this one is OK, see comment in kernel/pid_namespace.c about it). * We want the winner to have the "later" value, because if the * "earlier" value prevails, then a pid may get reused immediately. * diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index e9c9adc84ca..a8968396046 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -191,9 +191,40 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) return; } +static int pid_ns_ctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * Writing directly to ns' last_pid field is OK, since this field + * is volatile in a living namespace anyway and a code writing to + * it should synchronize its usage with external means. + */ + + tmp.data = ¤t->nsproxy->pid_ns->last_pid; + return proc_dointvec(&tmp, write, buffer, lenp, ppos); +} + +static struct ctl_table pid_ns_ctl_table[] = { + { + .procname = "ns_last_pid", + .maxlen = sizeof(int), + .mode = 0666, /* permissions are checked in the handler */ + .proc_handler = pid_ns_ctl_handler, + }, + { } +}; + +static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); + register_sysctl_paths(kern_path, pid_ns_ctl_table); return 0; } diff --git a/kernel/sys.c b/kernel/sys.c index ddf8155bf3f..40701538fbd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1692,6 +1692,124 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static int prctl_set_mm(int opt, unsigned long addr, + unsigned long arg4, unsigned long arg5) +{ + unsigned long rlim = rlimit(RLIMIT_DATA); + unsigned long vm_req_flags; + unsigned long vm_bad_flags; + struct vm_area_struct *vma; + int error = 0; + struct mm_struct *mm = current->mm; + + if (arg4 | arg5) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (addr >= TASK_SIZE) + return -EINVAL; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, addr); + + if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) { + /* It must be existing VMA */ + if (!vma || vma->vm_start > addr) + goto out; + } + + error = -EINVAL; + switch (opt) { + case PR_SET_MM_START_CODE: + case PR_SET_MM_END_CODE: + vm_req_flags = VM_READ | VM_EXEC; + vm_bad_flags = VM_WRITE | VM_MAYSHARE; + + if ((vma->vm_flags & vm_req_flags) != vm_req_flags || + (vma->vm_flags & vm_bad_flags)) + goto out; + + if (opt == PR_SET_MM_START_CODE) + mm->start_code = addr; + else + mm->end_code = addr; + break; + + case PR_SET_MM_START_DATA: + case PR_SET_MM_END_DATA: + vm_req_flags = VM_READ | VM_WRITE; + vm_bad_flags = VM_EXEC | VM_MAYSHARE; + + if ((vma->vm_flags & vm_req_flags) != vm_req_flags || + (vma->vm_flags & vm_bad_flags)) + goto out; + + if (opt == PR_SET_MM_START_DATA) + mm->start_data = addr; + else + mm->end_data = addr; + break; + + case PR_SET_MM_START_STACK: + +#ifdef CONFIG_STACK_GROWSUP + vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP; +#else + vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN; +#endif + if ((vma->vm_flags & vm_req_flags) != vm_req_flags) + goto out; + + mm->start_stack = addr; + break; + + case PR_SET_MM_START_BRK: + if (addr <= mm->end_data) + goto out; + + if (rlim < RLIM_INFINITY && + (mm->brk - addr) + + (mm->end_data - mm->start_data) > rlim) + goto out; + + mm->start_brk = addr; + break; + + case PR_SET_MM_BRK: + if (addr <= mm->end_data) + goto out; + + if (rlim < RLIM_INFINITY && + (addr - mm->start_brk) + + (mm->end_data - mm->start_data) > rlim) + goto out; + + mm->brk = addr; + break; + + default: + error = -EINVAL; + goto out; + } + + error = 0; + +out: + up_read(&mm->mmap_sem); + + return error; +} +#else /* CONFIG_CHECKPOINT_RESTORE */ +static int prctl_set_mm(int opt, unsigned long addr, + unsigned long arg4, unsigned long arg5) +{ + return -EINVAL; +} +#endif + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -1841,6 +1959,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else error = PR_MCE_KILL_DEFAULT; break; + case PR_SET_MM: + error = prctl_set_mm(arg2, arg3, arg4, arg5); + break; default: error = -EINVAL; break; diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 5a7a2adf4c4..4531294fa62 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -279,7 +279,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, ret = 0; exit_2: if (!input) - free(in_buf); + free(in_buf_save); exit_1: if (!output) free(out_buf); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d9df7454519..dc63d081839 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -48,16 +48,14 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; - struct rcu_head rcu_head; + union { + struct radix_tree_node *parent; /* Used when ascending tree */ + struct rcu_head rcu_head; /* Used when freeing node */ + }; void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; -struct radix_tree_path { - struct radix_tree_node *node; - int offset; -}; - #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) @@ -256,6 +254,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) { struct radix_tree_node *node; + struct radix_tree_node *slot; unsigned int height; int tag; @@ -274,18 +273,23 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) if (!(node = radix_tree_node_alloc(root))) return -ENOMEM; - /* Increase the height. */ - node->slots[0] = indirect_to_ptr(root->rnode); - /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { if (root_tag_get(root, tag)) tag_set(node, tag, 0); } + /* Increase the height. */ newheight = root->height+1; node->height = newheight; node->count = 1; + node->parent = NULL; + slot = root->rnode; + if (newheight > 1) { + slot = indirect_to_ptr(slot); + slot->parent = node; + } + node->slots[0] = slot; node = ptr_to_indirect(node); rcu_assign_pointer(root->rnode, node); root->height = newheight; @@ -331,6 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root, if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; slot->height = height; + slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], slot); node->count++; @@ -504,47 +509,41 @@ EXPORT_SYMBOL(radix_tree_tag_set); void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag) { - /* - * The radix tree path needs to be one longer than the maximum path - * since the "list" is null terminated. - */ - struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *node = NULL; struct radix_tree_node *slot = NULL; unsigned int height, shift; + int uninitialized_var(offset); height = root->height; if (index > radix_tree_maxindex(height)) goto out; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; + shift = height * RADIX_TREE_MAP_SHIFT; slot = indirect_to_ptr(root->rnode); - while (height > 0) { - int offset; - + while (shift) { if (slot == NULL) goto out; + shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp[1].offset = offset; - pathp[1].node = slot; + node = slot; slot = slot->slots[offset]; - pathp++; - shift -= RADIX_TREE_MAP_SHIFT; - height--; } if (slot == NULL) goto out; - while (pathp->node) { - if (!tag_get(pathp->node, tag, pathp->offset)) + while (node) { + if (!tag_get(node, tag, offset)) goto out; - tag_clear(pathp->node, tag, pathp->offset); - if (any_tag_set(pathp->node, tag)) + tag_clear(node, tag, offset); + if (any_tag_set(node, tag)) goto out; - pathp--; + + index >>= RADIX_TREE_MAP_SHIFT; + offset = index & RADIX_TREE_MAP_MASK; + node = node->parent; } /* clear the root's tag bit */ @@ -646,8 +645,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned int iftag, unsigned int settag) { unsigned int height = root->height; - struct radix_tree_path path[height]; - struct radix_tree_path *pathp = path; + struct radix_tree_node *node = NULL; struct radix_tree_node *slot; unsigned int shift; unsigned long tagged = 0; @@ -671,14 +669,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, shift = (height - 1) * RADIX_TREE_MAP_SHIFT; slot = indirect_to_ptr(root->rnode); - /* - * we fill the path from (root->height - 2) to 0, leaving the index at - * (root->height - 1) as a terminator. Zero the node in the terminator - * so that we can use this to end walk loops back up the path. - */ - path[height - 1].node = NULL; - for (;;) { + unsigned long upindex; int offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -686,12 +678,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, goto next; if (!tag_get(slot, iftag, offset)) goto next; - if (height > 1) { + if (shift) { /* Go down one level */ - height--; shift -= RADIX_TREE_MAP_SHIFT; - path[height - 1].node = slot; - path[height - 1].offset = offset; + node = slot; slot = slot->slots[offset]; continue; } @@ -701,15 +691,27 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, tag_set(slot, settag, offset); /* walk back up the path tagging interior nodes */ - pathp = &path[0]; - while (pathp->node) { + upindex = index; + while (node) { + upindex >>= RADIX_TREE_MAP_SHIFT; + offset = upindex & RADIX_TREE_MAP_MASK; + /* stop if we find a node with the tag already set */ - if (tag_get(pathp->node, settag, pathp->offset)) + if (tag_get(node, settag, offset)) break; - tag_set(pathp->node, settag, pathp->offset); - pathp++; + tag_set(node, settag, offset); + node = node->parent; } + /* + * Small optimization: now clear that node pointer. + * Since all of this slot's ancestors now have the tag set + * from setting it above, we have no further need to walk + * back up the tree setting tags, until we update slot to + * point to another radix_tree_node. + */ + node = NULL; + next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; @@ -724,8 +726,7 @@ next: * last_index is guaranteed to be in the tree, what * we do below cannot wander astray. */ - slot = path[height - 1].node; - height++; + slot = slot->parent; shift += RADIX_TREE_MAP_SHIFT; } } @@ -1299,7 +1300,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) /* try to shrink tree height */ while (root->height > 0) { struct radix_tree_node *to_free = root->rnode; - void *newptr; + struct radix_tree_node *slot; BUG_ON(!radix_tree_is_indirect_ptr(to_free)); to_free = indirect_to_ptr(to_free); @@ -1320,10 +1321,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - newptr = to_free->slots[0]; - if (root->height > 1) - newptr = ptr_to_indirect(newptr); - root->rnode = newptr; + slot = to_free->slots[0]; + if (root->height > 1) { + slot->parent = NULL; + slot = ptr_to_indirect(slot); + } + root->rnode = slot; root->height--; /* @@ -1363,16 +1366,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) */ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { - /* - * The radix tree path needs to be one longer than the maximum path - * since the "list" is null terminated. - */ - struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *node = NULL; struct radix_tree_node *slot = NULL; struct radix_tree_node *to_free; unsigned int height, shift; int tag; - int offset; + int uninitialized_var(offset); height = root->height; if (index > radix_tree_maxindex(height)) @@ -1385,39 +1384,35 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) goto out; } slot = indirect_to_ptr(slot); - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; + shift = height * RADIX_TREE_MAP_SHIFT; do { if (slot == NULL) goto out; - pathp++; + shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp->offset = offset; - pathp->node = slot; + node = slot; slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); + } while (shift); if (slot == NULL) goto out; /* - * Clear all tags associated with the just-deleted item + * Clear all tags associated with the item to be deleted. + * This way of doing it would be inefficient, but seldom is any set. */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (tag_get(pathp->node, tag, pathp->offset)) + if (tag_get(node, tag, offset)) radix_tree_tag_clear(root, index, tag); } to_free = NULL; /* Now free the nodes we do not need anymore */ - while (pathp->node) { - pathp->node->slots[pathp->offset] = NULL; - pathp->node->count--; + while (node) { + node->slots[offset] = NULL; + node->count--; /* * Queue the node for deferred freeing after the * last reference to it disappears (set NULL, above). @@ -1425,17 +1420,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) if (to_free) radix_tree_node_free(to_free); - if (pathp->node->count) { - if (pathp->node == indirect_to_ptr(root->rnode)) + if (node->count) { + if (node == indirect_to_ptr(root->rnode)) radix_tree_shrink(root); goto out; } /* Node with zero slots in use so free it */ - to_free = pathp->node; - pathp--; + to_free = node; + index >>= RADIX_TREE_MAP_SHIFT; + offset = index & RADIX_TREE_MAP_MASK; + node = node->parent; } + root_tag_clear_all(root); root->height = 0; root->rnode = NULL; diff --git a/mm/compaction.c b/mm/compaction.c index e6670c34eb4..71a58f67f48 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -350,7 +350,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } if (!cc->sync) - mode |= ISOLATE_CLEAN; + mode |= ISOLATE_ASYNC_MIGRATE; /* Try isolate the page */ if (__isolate_lru_page(page, mode, 0) != 0) @@ -557,7 +557,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, (unsigned long)cc, false, - cc->sync); + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -671,6 +671,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .sync = true, }; zone = &pgdat->node_zones[zoneid]; diff --git a/mm/filemap.c b/mm/filemap.c index c4ee2e918be..97f49ed35bd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -393,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) { int error; - struct mem_cgroup *memcg = NULL; VM_BUG_ON(!PageLocked(old)); VM_BUG_ON(!PageLocked(new)); VM_BUG_ON(new->mapping); - /* - * This is not page migration, but prepare_migration and - * end_migration does enough work for charge replacement. - * - * In the longer term we probably want a specialized function - * for moving the charge from old to new in a more efficient - * manner. - */ - error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); - if (error) - return error; - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (!error) { struct address_space *mapping = old->mapping; @@ -432,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (PageSwapBacked(new)) __inc_zone_page_state(new, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); + /* mem_cgroup codes must not be called under tree_lock */ + mem_cgroup_replace_page_cache(old, new); radix_tree_preload_end(); if (freepage) freepage(old); page_cache_release(old); - mem_cgroup_end_migration(memcg, old, new, true); - } else { - mem_cgroup_end_migration(memcg, old, new, false); } return error; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 36b3d988b4e..b3ffc21ce80 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -487,41 +487,68 @@ static struct attribute_group khugepaged_attr_group = { .attrs = khugepaged_attr, .name = "khugepaged", }; -#endif /* CONFIG_SYSFS */ -static int __init hugepage_init(void) +static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) { int err; -#ifdef CONFIG_SYSFS - static struct kobject *hugepage_kobj; -#endif - - err = -EINVAL; - if (!has_transparent_hugepage()) { - transparent_hugepage_flags = 0; - goto out; - } -#ifdef CONFIG_SYSFS - err = -ENOMEM; - hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); - if (unlikely(!hugepage_kobj)) { + *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); + if (unlikely(!*hugepage_kobj)) { printk(KERN_ERR "hugepage: failed kobject create\n"); - goto out; + return -ENOMEM; } - err = sysfs_create_group(hugepage_kobj, &hugepage_attr_group); + err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { printk(KERN_ERR "hugepage: failed register hugeage group\n"); - goto out; + goto delete_obj; } - err = sysfs_create_group(hugepage_kobj, &khugepaged_attr_group); + err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { printk(KERN_ERR "hugepage: failed register hugeage group\n"); - goto out; + goto remove_hp_group; } -#endif + + return 0; + +remove_hp_group: + sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group); +delete_obj: + kobject_put(*hugepage_kobj); + return err; +} + +static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj) +{ + sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group); + sysfs_remove_group(hugepage_kobj, &hugepage_attr_group); + kobject_put(hugepage_kobj); +} +#else +static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj) +{ + return 0; +} + +static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj) +{ +} +#endif /* CONFIG_SYSFS */ + +static int __init hugepage_init(void) +{ + int err; + struct kobject *hugepage_kobj; + + if (!has_transparent_hugepage()) { + transparent_hugepage_flags = 0; + return -EINVAL; + } + + err = hugepage_init_sysfs(&hugepage_kobj); + if (err) + return err; err = khugepaged_slab_init(); if (err) @@ -545,7 +572,9 @@ static int __init hugepage_init(void) set_recommended_min_free_kbytes(); + return 0; out: + hugepage_exit_sysfs(hugepage_kobj); return err; } module_init(hugepage_init) @@ -997,7 +1026,7 @@ out: } int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - pmd_t *pmd) + pmd_t *pmd, unsigned long addr) { int ret = 0; @@ -1013,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pgtable = get_pmd_huge_pte(tlb->mm); page = pmd_page(*pmd); pmd_clear(pmd); + tlb_remove_pmd_tlb_entry(tlb, pmd, addr); page_remove_rmap(page); VM_BUG_ON(page_mapcount(page) < 0); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); @@ -1116,7 +1146,6 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, entry = pmd_modify(entry, newprot); set_pmd_at(mm, addr, pmd, entry); spin_unlock(&vma->vm_mm->page_table_lock); - flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); ret = 1; } } else @@ -1199,16 +1228,16 @@ static int __split_huge_page_splitting(struct page *page, static void __split_huge_page_refcount(struct page *page) { int i; - unsigned long head_index = page->index; struct zone *zone = page_zone(page); - int zonestat; int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); compound_lock(page); + /* complete memcg works before add pages to LRU */ + mem_cgroup_split_huge_fixup(page); - for (i = 1; i < HPAGE_PMD_NR; i++) { + for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { struct page *page_tail = page + i; /* tail_page->_mapcount cannot change */ @@ -1271,14 +1300,13 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(page_tail->mapping); page_tail->mapping = page->mapping; - page_tail->index = ++head_index; + page_tail->index = page->index + i; BUG_ON(!PageAnon(page_tail)); BUG_ON(!PageUptodate(page_tail)); BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); - mem_cgroup_split_huge_fixup(page, page_tail); lru_add_page_tail(zone, page, page_tail); } @@ -1288,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); - /* - * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics, - * so adjust those appropriately if this page is on the LRU. - */ - if (PageLRU(page)) { - zonestat = NR_LRU_BASE + page_lru(page); - __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1)); - } - ClearPageCompound(page); compound_unlock(page); spin_unlock_irq(&zone->lru_lock); @@ -28,6 +28,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/slab.h> +#include <linux/memcontrol.h> #include <linux/rbtree.h> #include <linux/memory.h> #include <linux/mmu_notifier.h> @@ -1571,6 +1572,16 @@ struct page *ksm_does_need_to_copy(struct page *page, new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { + /* + * The memcg-specific accounting when moving + * pages around the LRU lists relies on the + * page's owner (memcg) to be valid. Usually, + * pages are assigned to a new owner before + * being put on the LRU list, but since this + * is not the case here, the stale owner from + * a previous allocation cycle must be reset. + */ + mem_cgroup_reset_owner(new_page); copy_user_highpage(new_page, page, address, vma); SetPageDirty(new_page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d87aa3510c5..602207be985 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -123,16 +123,22 @@ struct mem_cgroup_stat_cpu { unsigned long targets[MEM_CGROUP_NTARGETS]; }; +struct mem_cgroup_reclaim_iter { + /* css_id of the last scanned hierarchy member */ + int position; + /* scan generation, increased every round-trip */ + unsigned int generation; +}; + /* * per-zone information in memory controller. */ struct mem_cgroup_per_zone { - /* - * spin_lock to protect the per cgroup LRU - */ - struct list_head lists[NR_LRU_LISTS]; + struct lruvec lruvec; unsigned long count[NR_LRU_LISTS]; + struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; + struct zone_reclaim_stat reclaim_stat; struct rb_node tree_node; /* RB tree node */ unsigned long long usage_in_excess;/* Set to the value by which */ @@ -233,11 +239,6 @@ struct mem_cgroup { * per zone LRU lists. */ struct mem_cgroup_lru_info info; - /* - * While reclaiming in a hierarchy, we cache the last child we - * reclaimed from. - */ - int last_scanned_child; int last_scanned_node; #if MAX_NUMNODES > 1 nodemask_t scan_nodes; @@ -366,8 +367,6 @@ enum charge_type { #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) -#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 -#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) static void mem_cgroup_get(struct mem_cgroup *memcg); static void mem_cgroup_put(struct mem_cgroup *memcg); @@ -566,7 +565,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) struct mem_cgroup_per_zone *mz; struct mem_cgroup_tree_per_zone *mctz; - for_each_node_state(node, N_POSSIBLE) { + for_each_node(node) { for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = mem_cgroup_zoneinfo(memcg, node, zone); mctz = soft_limit_tree_node_zone(node, zone); @@ -656,16 +655,6 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); } -void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val) -{ - this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); -} - -void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val) -{ - this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); -} - static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx) { @@ -749,37 +738,32 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, return total; } -static bool __memcg_event_check(struct mem_cgroup *memcg, int target) +static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, + enum mem_cgroup_events_target target) { unsigned long val, next; val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); next = __this_cpu_read(memcg->stat->targets[target]); /* from time_after() in jiffies.h */ - return ((long)next - (long)val < 0); -} - -static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target) -{ - unsigned long val, next; - - val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); - - switch (target) { - case MEM_CGROUP_TARGET_THRESH: - next = val + THRESHOLDS_EVENTS_TARGET; - break; - case MEM_CGROUP_TARGET_SOFTLIMIT: - next = val + SOFTLIMIT_EVENTS_TARGET; - break; - case MEM_CGROUP_TARGET_NUMAINFO: - next = val + NUMAINFO_EVENTS_TARGET; - break; - default: - return; + if ((long)next - (long)val < 0) { + switch (target) { + case MEM_CGROUP_TARGET_THRESH: + next = val + THRESHOLDS_EVENTS_TARGET; + break; + case MEM_CGROUP_TARGET_SOFTLIMIT: + next = val + SOFTLIMIT_EVENTS_TARGET; + break; + case MEM_CGROUP_TARGET_NUMAINFO: + next = val + NUMAINFO_EVENTS_TARGET; + break; + default: + break; + } + __this_cpu_write(memcg->stat->targets[target], next); + return true; } - - __this_cpu_write(memcg->stat->targets[target], next); + return false; } /* @@ -790,25 +774,27 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) { preempt_disable(); /* threshold event is triggered in finer grain than soft limit */ - if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) { + if (unlikely(mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_THRESH))) { + bool do_softlimit, do_numainfo; + + do_softlimit = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_SOFTLIMIT); +#if MAX_NUMNODES > 1 + do_numainfo = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_NUMAINFO); +#endif + preempt_enable(); + mem_cgroup_threshold(memcg); - __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH); - if (unlikely(__memcg_event_check(memcg, - MEM_CGROUP_TARGET_SOFTLIMIT))) { + if (unlikely(do_softlimit)) mem_cgroup_update_tree(memcg, page); - __mem_cgroup_target_update(memcg, - MEM_CGROUP_TARGET_SOFTLIMIT); - } #if MAX_NUMNODES > 1 - if (unlikely(__memcg_event_check(memcg, - MEM_CGROUP_TARGET_NUMAINFO))) { + if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); - __mem_cgroup_target_update(memcg, - MEM_CGROUP_TARGET_NUMAINFO); - } #endif - } - preempt_enable(); + } else + preempt_enable(); } struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) @@ -853,83 +839,116 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return memcg; } -/* The caller has to guarantee "mem" exists before calling this */ -static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg) +/** + * mem_cgroup_iter - iterate over memory cgroup hierarchy + * @root: hierarchy root + * @prev: previously returned memcg, NULL on first invocation + * @reclaim: cookie for shared reclaim walks, NULL for full walks + * + * Returns references to children of the hierarchy below @root, or + * @root itself, or %NULL after a full round-trip. + * + * Caller must pass the return value in @prev on subsequent + * invocations for reference counting, or use mem_cgroup_iter_break() + * to cancel a hierarchy walk before the round-trip is complete. + * + * Reclaimers can specify a zone and a priority level in @reclaim to + * divide up the memcgs in the hierarchy among all concurrent + * reclaimers operating on the same zone and priority. + */ +struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, + struct mem_cgroup *prev, + struct mem_cgroup_reclaim_cookie *reclaim) { - struct cgroup_subsys_state *css; - int found; + struct mem_cgroup *memcg = NULL; + int id = 0; - if (!memcg) /* ROOT cgroup has the smallest ID */ - return root_mem_cgroup; /*css_put/get against root is ignored*/ - if (!memcg->use_hierarchy) { - if (css_tryget(&memcg->css)) - return memcg; + if (mem_cgroup_disabled()) return NULL; - } - rcu_read_lock(); - /* - * searching a memory cgroup which has the smallest ID under given - * ROOT cgroup. (ID >= 1) - */ - css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found); - if (css && css_tryget(css)) - memcg = container_of(css, struct mem_cgroup, css); - else - memcg = NULL; - rcu_read_unlock(); - return memcg; -} -static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, - struct mem_cgroup *root, - bool cond) -{ - int nextid = css_id(&iter->css) + 1; - int found; - int hierarchy_used; - struct cgroup_subsys_state *css; + if (!root) + root = root_mem_cgroup; - hierarchy_used = iter->use_hierarchy; + if (prev && !reclaim) + id = css_id(&prev->css); - css_put(&iter->css); - /* If no ROOT, walk all, ignore hierarchy */ - if (!cond || (root && !hierarchy_used)) - return NULL; + if (prev && prev != root) + css_put(&prev->css); - if (!root) - root = root_mem_cgroup; + if (!root->use_hierarchy && root != root_mem_cgroup) { + if (prev) + return NULL; + return root; + } - do { - iter = NULL; - rcu_read_lock(); + while (!memcg) { + struct mem_cgroup_reclaim_iter *uninitialized_var(iter); + struct cgroup_subsys_state *css; + + if (reclaim) { + int nid = zone_to_nid(reclaim->zone); + int zid = zone_idx(reclaim->zone); + struct mem_cgroup_per_zone *mz; - css = css_get_next(&mem_cgroup_subsys, nextid, - &root->css, &found); - if (css && css_tryget(css)) - iter = container_of(css, struct mem_cgroup, css); + mz = mem_cgroup_zoneinfo(root, nid, zid); + iter = &mz->reclaim_iter[reclaim->priority]; + if (prev && reclaim->generation != iter->generation) + return NULL; + id = iter->position; + } + + rcu_read_lock(); + css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id); + if (css) { + if (css == &root->css || css_tryget(css)) + memcg = container_of(css, + struct mem_cgroup, css); + } else + id = 0; rcu_read_unlock(); - /* If css is NULL, no more cgroups will be found */ - nextid = found + 1; - } while (css && !iter); - return iter; + if (reclaim) { + iter->position = id; + if (!css) + iter->generation++; + else if (!prev && memcg) + reclaim->generation = iter->generation; + } + + if (prev && !css) + return NULL; + } + return memcg; } -/* - * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please - * be careful that "break" loop is not allowed. We have reference count. - * Instead of that modify "cond" to be false and "continue" to exit the loop. - */ -#define for_each_mem_cgroup_tree_cond(iter, root, cond) \ - for (iter = mem_cgroup_start_loop(root);\ - iter != NULL;\ - iter = mem_cgroup_get_next(iter, root, cond)) -#define for_each_mem_cgroup_tree(iter, root) \ - for_each_mem_cgroup_tree_cond(iter, root, true) +/** + * mem_cgroup_iter_break - abort a hierarchy walk prematurely + * @root: hierarchy root + * @prev: last visited hierarchy member as returned by mem_cgroup_iter() + */ +void mem_cgroup_iter_break(struct mem_cgroup *root, + struct mem_cgroup *prev) +{ + if (!root) + root = root_mem_cgroup; + if (prev && prev != root) + css_put(&prev->css); +} -#define for_each_mem_cgroup_all(iter) \ - for_each_mem_cgroup_tree_cond(iter, NULL, true) +/* + * Iteration constructs for visiting all cgroups (under a tree). If + * loops are exited prematurely (break), mem_cgroup_iter_break() must + * be used for reference counting. + */ +#define for_each_mem_cgroup_tree(iter, root) \ + for (iter = mem_cgroup_iter(root, NULL, NULL); \ + iter != NULL; \ + iter = mem_cgroup_iter(root, iter, NULL)) +#define for_each_mem_cgroup(iter) \ + for (iter = mem_cgroup_iter(NULL, NULL, NULL); \ + iter != NULL; \ + iter = mem_cgroup_iter(NULL, iter, NULL)) static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { @@ -949,11 +968,11 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) goto out; switch (idx) { - case PGMAJFAULT: - mem_cgroup_pgmajfault(memcg, 1); - break; case PGFAULT: - mem_cgroup_pgfault(memcg, 1); + this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); + break; + case PGMAJFAULT: + this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); break; default: BUG(); @@ -963,6 +982,27 @@ out: } EXPORT_SYMBOL(mem_cgroup_count_vm_event); +/** + * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg + * @zone: zone of the wanted lruvec + * @mem: memcg of the wanted lruvec + * + * Returns the lru list vector holding pages for the given @zone and + * @mem. This can be the global zone lruvec, if the memory controller + * is disabled. + */ +struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, + struct mem_cgroup *memcg) +{ + struct mem_cgroup_per_zone *mz; + + if (mem_cgroup_disabled()) + return &zone->lruvec; + + mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); + return &mz->lruvec; +} + /* * Following LRU functions are allowed to be used without PCG_LOCK. * Operations are called by routine of global LRU independently from memcg. @@ -977,180 +1017,91 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event); * When moving account, the page is not on LRU. It's isolated. */ -void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) -{ - struct page_cgroup *pc; - struct mem_cgroup_per_zone *mz; - - if (mem_cgroup_disabled()) - return; - pc = lookup_page_cgroup(page); - /* can happen while we handle swapcache. */ - if (!TestClearPageCgroupAcctLRU(pc)) - return; - VM_BUG_ON(!pc->mem_cgroup); - /* - * We don't check PCG_USED bit. It's cleared when the "page" is finally - * removed from global LRU. - */ - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); - /* huge page split is done under lru_lock. so, we have no races. */ - MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); - if (mem_cgroup_is_root(pc->mem_cgroup)) - return; - VM_BUG_ON(list_empty(&pc->lru)); - list_del_init(&pc->lru); -} - -void mem_cgroup_del_lru(struct page *page) -{ - mem_cgroup_del_lru_list(page, page_lru(page)); -} - -/* - * Writeback is about to end against a page which has been marked for immediate - * reclaim. If it still appears to be reclaimable, move it to the tail of the - * inactive list. +/** + * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec + * @zone: zone of the page + * @page: the page + * @lru: current lru + * + * This function accounts for @page being added to @lru, and returns + * the lruvec for the given @zone and the memcg @page is charged to. + * + * The callsite is then responsible for physically linking the page to + * the returned lruvec->lists[@lru]. */ -void mem_cgroup_rotate_reclaimable_page(struct page *page) +struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, + enum lru_list lru) { struct mem_cgroup_per_zone *mz; + struct mem_cgroup *memcg; struct page_cgroup *pc; - enum lru_list lru = page_lru(page); if (mem_cgroup_disabled()) - return; + return &zone->lruvec; pc = lookup_page_cgroup(page); - /* unused or root page is not rotated. */ - if (!PageCgroupUsed(pc)) - return; - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ - smp_rmb(); - if (mem_cgroup_is_root(pc->mem_cgroup)) - return; - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); - list_move_tail(&pc->lru, &mz->lists[lru]); + memcg = pc->mem_cgroup; + mz = page_cgroup_zoneinfo(memcg, page); + /* compound_order() is stabilized through lru_lock */ + MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); + return &mz->lruvec; } -void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) +/** + * mem_cgroup_lru_del_list - account for removing an lru page + * @page: the page + * @lru: target lru + * + * This function accounts for @page being removed from @lru. + * + * The callsite is then responsible for physically unlinking + * @page->lru. + */ +void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) { struct mem_cgroup_per_zone *mz; + struct mem_cgroup *memcg; struct page_cgroup *pc; if (mem_cgroup_disabled()) return; pc = lookup_page_cgroup(page); - /* unused or root page is not rotated. */ - if (!PageCgroupUsed(pc)) - return; - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ - smp_rmb(); - if (mem_cgroup_is_root(pc->mem_cgroup)) - return; - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); - list_move(&pc->lru, &mz->lists[lru]); -} - -void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) -{ - struct page_cgroup *pc; - struct mem_cgroup_per_zone *mz; - - if (mem_cgroup_disabled()) - return; - pc = lookup_page_cgroup(page); - VM_BUG_ON(PageCgroupAcctLRU(pc)); - /* - * putback: charge: - * SetPageLRU SetPageCgroupUsed - * smp_mb smp_mb - * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU - * - * Ensure that one of the two sides adds the page to the memcg - * LRU during a race. - */ - smp_mb(); - if (!PageCgroupUsed(pc)) - return; - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ - smp_rmb(); - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); + memcg = pc->mem_cgroup; + VM_BUG_ON(!memcg); + mz = page_cgroup_zoneinfo(memcg, page); /* huge page split is done under lru_lock. so, we have no races. */ - MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); - SetPageCgroupAcctLRU(pc); - if (mem_cgroup_is_root(pc->mem_cgroup)) - return; - list_add(&pc->lru, &mz->lists[lru]); -} - -/* - * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed - * while it's linked to lru because the page may be reused after it's fully - * uncharged. To handle that, unlink page_cgroup from LRU when charge it again. - * It's done under lock_page and expected that zone->lru_lock isnever held. - */ -static void mem_cgroup_lru_del_before_commit(struct page *page) -{ - unsigned long flags; - struct zone *zone = page_zone(page); - struct page_cgroup *pc = lookup_page_cgroup(page); - - /* - * Doing this check without taking ->lru_lock seems wrong but this - * is safe. Because if page_cgroup's USED bit is unset, the page - * will not be added to any memcg's LRU. If page_cgroup's USED bit is - * set, the commit after this will fail, anyway. - * This all charge/uncharge is done under some mutual execustion. - * So, we don't need to taking care of changes in USED bit. - */ - if (likely(!PageLRU(page))) - return; - - spin_lock_irqsave(&zone->lru_lock, flags); - /* - * Forget old LRU when this page_cgroup is *not* used. This Used bit - * is guarded by lock_page() because the page is SwapCache. - */ - if (!PageCgroupUsed(pc)) - mem_cgroup_del_lru_list(page, page_lru(page)); - spin_unlock_irqrestore(&zone->lru_lock, flags); + VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page))); + MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); } -static void mem_cgroup_lru_add_after_commit(struct page *page) +void mem_cgroup_lru_del(struct page *page) { - unsigned long flags; - struct zone *zone = page_zone(page); - struct page_cgroup *pc = lookup_page_cgroup(page); - /* - * putback: charge: - * SetPageLRU SetPageCgroupUsed - * smp_mb smp_mb - * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU - * - * Ensure that one of the two sides adds the page to the memcg - * LRU during a race. - */ - smp_mb(); - /* taking care of that the page is added to LRU while we commit it */ - if (likely(!PageLRU(page))) - return; - spin_lock_irqsave(&zone->lru_lock, flags); - /* link when the page is linked to LRU but page_cgroup isn't */ - if (PageLRU(page) && !PageCgroupAcctLRU(pc)) - mem_cgroup_add_lru_list(page, page_lru(page)); - spin_unlock_irqrestore(&zone->lru_lock, flags); + mem_cgroup_lru_del_list(page, page_lru(page)); } - -void mem_cgroup_move_lists(struct page *page, - enum lru_list from, enum lru_list to) +/** + * mem_cgroup_lru_move_lists - account for moving a page between lrus + * @zone: zone of the page + * @page: the page + * @from: current lru + * @to: target lru + * + * This function accounts for @page being moved between the lrus @from + * and @to, and returns the lruvec for the given @zone and the memcg + * @page is charged to. + * + * The callsite is then responsible for physically relinking + * @page->lru to the returned lruvec->lists[@to]. + */ +struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, + struct page *page, + enum lru_list from, + enum lru_list to) { - if (mem_cgroup_disabled()) - return; - mem_cgroup_del_lru_list(page, from); - mem_cgroup_add_lru_list(page, to); + /* XXX: Optimize this, especially for @from == @to */ + mem_cgroup_lru_del_list(page, from); + return mem_cgroup_lru_add_list(zone, page, to); } /* @@ -1175,10 +1126,21 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) struct task_struct *p; p = find_lock_task_mm(task); - if (!p) - return 0; - curr = try_get_mem_cgroup_from_mm(p->mm); - task_unlock(p); + if (p) { + curr = try_get_mem_cgroup_from_mm(p->mm); + task_unlock(p); + } else { + /* + * All threads may have already detached their mm's, but the oom + * killer still needs to detect if they have already been oom + * killed to prevent needlessly killing additional tasks. + */ + task_lock(task); + curr = mem_cgroup_from_task(task); + if (curr) + css_get(&curr->css); + task_unlock(task); + } if (!curr) return 0; /* @@ -1258,68 +1220,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) return &mz->reclaim_stat; } -unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, - struct list_head *dst, - unsigned long *scanned, int order, - isolate_mode_t mode, - struct zone *z, - struct mem_cgroup *mem_cont, - int active, int file) -{ - unsigned long nr_taken = 0; - struct page *page; - unsigned long scan; - LIST_HEAD(pc_list); - struct list_head *src; - struct page_cgroup *pc, *tmp; - int nid = zone_to_nid(z); - int zid = zone_idx(z); - struct mem_cgroup_per_zone *mz; - int lru = LRU_FILE * file + active; - int ret; - - BUG_ON(!mem_cont); - mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); - src = &mz->lists[lru]; - - scan = 0; - list_for_each_entry_safe_reverse(pc, tmp, src, lru) { - if (scan >= nr_to_scan) - break; - - if (unlikely(!PageCgroupUsed(pc))) - continue; - - page = lookup_cgroup_page(pc); - - if (unlikely(!PageLRU(page))) - continue; - - scan++; - ret = __isolate_lru_page(page, mode, file); - switch (ret) { - case 0: - list_move(&page->lru, dst); - mem_cgroup_del_lru(page); - nr_taken += hpage_nr_pages(page); - break; - case -EBUSY: - /* we don't affect global LRU but rotate in our LRU */ - mem_cgroup_rotate_lru_list(page, page_lru(page)); - break; - default: - break; - } - } - - *scanned = scan; - - trace_mm_vmscan_memcg_isolate(0, nr_to_scan, scan, nr_taken, - 0, 0, 0, mode); - - return nr_taken; -} - #define mem_cgroup_from_res_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) @@ -1536,41 +1436,40 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) return min(limit, memsw); } -/* - * Visit the first child (need not be the first child as per the ordering - * of the cgroup list, since we track last_scanned_child) of @mem and use - * that to reclaim free pages from. - */ -static struct mem_cgroup * -mem_cgroup_select_victim(struct mem_cgroup *root_memcg) +static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, + gfp_t gfp_mask, + unsigned long flags) { - struct mem_cgroup *ret = NULL; - struct cgroup_subsys_state *css; - int nextid, found; - - if (!root_memcg->use_hierarchy) { - css_get(&root_memcg->css); - ret = root_memcg; - } + unsigned long total = 0; + bool noswap = false; + int loop; - while (!ret) { - rcu_read_lock(); - nextid = root_memcg->last_scanned_child + 1; - css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css, - &found); - if (css && css_tryget(css)) - ret = container_of(css, struct mem_cgroup, css); + if (flags & MEM_CGROUP_RECLAIM_NOSWAP) + noswap = true; + if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum) + noswap = true; - rcu_read_unlock(); - /* Updates scanning parameter */ - if (!css) { - /* this means start scan from ID:1 */ - root_memcg->last_scanned_child = 0; - } else - root_memcg->last_scanned_child = found; + for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) { + if (loop) + drain_all_stock_async(memcg); + total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap); + /* + * Allow limit shrinkers, which are triggered directly + * by userspace, to catch signals and stop reclaim + * after minimal progress, regardless of the margin. + */ + if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK)) + break; + if (mem_cgroup_margin(memcg)) + break; + /* + * If nothing was reclaimed after two attempts, there + * may be no reclaimable pages in this hierarchy. + */ + if (loop && !total) + break; } - - return ret; + return total; } /** @@ -1710,61 +1609,35 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) } #endif -/* - * Scan the hierarchy if needed to reclaim memory. We remember the last child - * we reclaimed from, so that we don't end up penalizing one child extensively - * based on its position in the children list. - * - * root_memcg is the original ancestor that we've been reclaim from. - * - * We give up and return to the caller when we visit root_memcg twice. - * (other groups can be removed while we're walking....) - * - * If shrink==true, for avoiding to free too much, this returns immedieately. - */ -static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, - struct zone *zone, - gfp_t gfp_mask, - unsigned long reclaim_options, - unsigned long *total_scanned) -{ - struct mem_cgroup *victim; - int ret, total = 0; +static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, + struct zone *zone, + gfp_t gfp_mask, + unsigned long *total_scanned) +{ + struct mem_cgroup *victim = NULL; + int total = 0; int loop = 0; - bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; - bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; - bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; unsigned long excess; unsigned long nr_scanned; + struct mem_cgroup_reclaim_cookie reclaim = { + .zone = zone, + .priority = 0, + }; excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; - /* If memsw_is_minimum==1, swap-out is of-no-use. */ - if (!check_soft && !shrink && root_memcg->memsw_is_minimum) - noswap = true; - while (1) { - victim = mem_cgroup_select_victim(root_memcg); - if (victim == root_memcg) { + victim = mem_cgroup_iter(root_memcg, victim, &reclaim); + if (!victim) { loop++; - /* - * We are not draining per cpu cached charges during - * soft limit reclaim because global reclaim doesn't - * care about charges. It tries to free some memory and - * charges will not give any. - */ - if (!check_soft && loop >= 1) - drain_all_stock_async(root_memcg); if (loop >= 2) { /* * If we have not been able to reclaim * anything, it might because there are * no reclaimable pages under this hierarchy */ - if (!check_soft || !total) { - css_put(&victim->css); + if (!total) break; - } /* * We want to do more targeted reclaim. * excess >> 2 is not to excessive so as to @@ -1772,40 +1645,20 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, * coming back to reclaim from this cgroup */ if (total >= (excess >> 2) || - (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) { - css_put(&victim->css); + (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) break; - } } - } - if (!mem_cgroup_reclaimable(victim, noswap)) { - /* this cgroup's local usage == 0 */ - css_put(&victim->css); continue; } - /* we use swappiness of local cgroup */ - if (check_soft) { - ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, - noswap, zone, &nr_scanned); - *total_scanned += nr_scanned; - } else - ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, - noswap); - css_put(&victim->css); - /* - * At shrinking usage, we can't check we should stop here or - * reclaim more. It's depends on callers. last_scanned_child - * will work enough for keeping fairness under tree. - */ - if (shrink) - return ret; - total += ret; - if (check_soft) { - if (!res_counter_soft_limit_excess(&root_memcg->res)) - return total; - } else if (mem_cgroup_margin(root_memcg)) - return total; + if (!mem_cgroup_reclaimable(victim, false)) + continue; + total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, + zone, &nr_scanned); + *total_scanned += nr_scanned; + if (!res_counter_soft_limit_excess(&root_memcg->res)) + break; } + mem_cgroup_iter_break(root_memcg, victim); return total; } @@ -1817,16 +1670,16 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) { struct mem_cgroup *iter, *failed = NULL; - bool cond = true; - for_each_mem_cgroup_tree_cond(iter, memcg, cond) { + for_each_mem_cgroup_tree(iter, memcg) { if (iter->oom_lock) { /* * this subtree of our hierarchy is already locked * so we cannot give a lock. */ failed = iter; - cond = false; + mem_cgroup_iter_break(memcg, iter); + break; } else iter->oom_lock = true; } @@ -1838,11 +1691,10 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) * OK, we failed to lock the whole subtree so we have to clean up * what we set up to the failing subtree */ - cond = true; - for_each_mem_cgroup_tree_cond(iter, memcg, cond) { + for_each_mem_cgroup_tree(iter, memcg) { if (iter == failed) { - cond = false; - continue; + mem_cgroup_iter_break(memcg, iter); + break; } iter->oom_lock = false; } @@ -2007,7 +1859,7 @@ void mem_cgroup_update_page_stat(struct page *page, bool need_unlock = false; unsigned long uninitialized_var(flags); - if (unlikely(!pc)) + if (mem_cgroup_disabled()) return; rcu_read_lock(); @@ -2238,7 +2090,7 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, struct mem_cgroup *iter; if ((action == CPU_ONLINE)) { - for_each_mem_cgroup_all(iter) + for_each_mem_cgroup(iter) synchronize_mem_cgroup_on_move(iter, cpu); return NOTIFY_OK; } @@ -2246,7 +2098,7 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN) return NOTIFY_OK; - for_each_mem_cgroup_all(iter) + for_each_mem_cgroup(iter) mem_cgroup_drain_pcp_counter(iter, cpu); stock = &per_cpu(memcg_stock, cpu); @@ -2300,8 +2152,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (!(gfp_mask & __GFP_WAIT)) return CHARGE_WOULDBLOCK; - ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, - gfp_mask, flags, NULL); + ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags); if (mem_cgroup_margin(mem_over_limit) >= nr_pages) return CHARGE_RETRY; /* @@ -2334,8 +2185,25 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, } /* - * Unlike exported interface, "oom" parameter is added. if oom==true, - * oom-killer can be invoked. + * __mem_cgroup_try_charge() does + * 1. detect memcg to be charged against from passed *mm and *ptr, + * 2. update res_counter + * 3. call memory reclaim if necessary. + * + * In some special case, if the task is fatal, fatal_signal_pending() or + * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup + * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon + * as possible without any hazards. 2: all pages should have a valid + * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg + * pointer, that is treated as a charge to root_mem_cgroup. + * + * So __mem_cgroup_try_charge() will return + * 0 ... on success, filling *ptr with a valid memcg pointer. + * -ENOMEM ... charge failure because of resource limits. + * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup. + * + * Unlike the exported interface, an "oom" parameter is added. if oom==true, + * the oom-killer can be invoked. */ static int __mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, @@ -2364,7 +2232,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, * set, if so charge the init_mm (happens for pagecache usage). */ if (!*ptr && !mm) - goto bypass; + *ptr = root_mem_cgroup; again: if (*ptr) { /* css should be a valid one */ memcg = *ptr; @@ -2390,7 +2258,9 @@ again: * task-struct. So, mm->owner can be NULL. */ memcg = mem_cgroup_from_task(p); - if (!memcg || mem_cgroup_is_root(memcg)) { + if (!memcg) + memcg = root_mem_cgroup; + if (mem_cgroup_is_root(memcg)) { rcu_read_unlock(); goto done; } @@ -2465,8 +2335,8 @@ nomem: *ptr = NULL; return -ENOMEM; bypass: - *ptr = NULL; - return 0; + *ptr = root_mem_cgroup; + return -EINTR; } /* @@ -2522,7 +2392,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); - id = lookup_swap_cgroup(ent); + id = lookup_swap_cgroup_id(ent); rcu_read_lock(); memcg = mem_cgroup_lookup(id); if (memcg && !css_tryget(&memcg->css)) @@ -2574,6 +2444,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages); unlock_page_cgroup(pc); + WARN_ON_ONCE(PageLRU(page)); /* * "charge_statistics" updated event counter. Then, check it. * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. @@ -2585,44 +2456,29 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ - (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) + (1 << PCG_MIGRATION)) /* * Because tail pages are not marked as "used", set it. We're under - * zone->lru_lock, 'splitting on pmd' and compund_lock. + * zone->lru_lock, 'splitting on pmd' and compound_lock. + * charge/uncharge will be never happen and move_account() is done under + * compound_lock(), so we don't have to take care of races. */ -void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) +void mem_cgroup_split_huge_fixup(struct page *head) { struct page_cgroup *head_pc = lookup_page_cgroup(head); - struct page_cgroup *tail_pc = lookup_page_cgroup(tail); - unsigned long flags; + struct page_cgroup *pc; + int i; if (mem_cgroup_disabled()) return; - /* - * We have no races with charge/uncharge but will have races with - * page state accounting. - */ - move_lock_page_cgroup(head_pc, &flags); - - tail_pc->mem_cgroup = head_pc->mem_cgroup; - smp_wmb(); /* see __commit_charge() */ - if (PageCgroupAcctLRU(head_pc)) { - enum lru_list lru; - struct mem_cgroup_per_zone *mz; - - /* - * LRU flags cannot be copied because we need to add tail - *.page to LRU by generic call and our hook will be called. - * We hold lru_lock, then, reduce counter directly. - */ - lru = page_lru(head); - mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head); - MEM_CGROUP_ZSTAT(mz, lru) -= 1; + for (i = 1; i < HPAGE_PMD_NR; i++) { + pc = head_pc + i; + pc->mem_cgroup = head_pc->mem_cgroup; + smp_wmb();/* see __commit_charge() */ + pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; } - tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; - move_unlock_page_cgroup(head_pc, &flags); } -#endif +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /** * mem_cgroup_move_account - move account of the page @@ -2737,7 +2593,7 @@ static int mem_cgroup_move_parent(struct page *page, parent = mem_cgroup_from_cont(pcg); ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false); - if (ret || !parent) + if (ret) goto put_back; if (nr_pages > 1) @@ -2783,12 +2639,9 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, } pc = lookup_page_cgroup(page); - BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ - ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); - if (ret || !memcg) + if (ret == -ENOMEM) return ret; - __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype); return 0; } @@ -2798,19 +2651,11 @@ int mem_cgroup_newpage_charge(struct page *page, { if (mem_cgroup_disabled()) return 0; - /* - * If already mapped, we don't have to account. - * If page cache, page->mapping has address_space. - * But page->mapping may have out-of-use anon_vma pointer, - * detecit it by PageAnon() check. newly-mapped-anon's page->mapping - * is NULL. - */ - if (page_mapped(page) || (page->mapping && !PageAnon(page))) - return 0; - if (unlikely(!mm)) - mm = &init_mm; + VM_BUG_ON(page_mapped(page)); + VM_BUG_ON(page->mapping && !PageAnon(page)); + VM_BUG_ON(!mm); return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_MAPPED); + MEM_CGROUP_CHARGE_TYPE_MAPPED); } static void @@ -2822,14 +2667,27 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg, enum charge_type ctype) { struct page_cgroup *pc = lookup_page_cgroup(page); + struct zone *zone = page_zone(page); + unsigned long flags; + bool removed = false; + /* * In some case, SwapCache, FUSE(splice_buf->radixtree), the page * is already on LRU. It means the page may on some other page_cgroup's * LRU. Take care of it. */ - mem_cgroup_lru_del_before_commit(page); + spin_lock_irqsave(&zone->lru_lock, flags); + if (PageLRU(page)) { + del_page_from_lru_list(zone, page, page_lru(page)); + ClearPageLRU(page); + removed = true; + } __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); - mem_cgroup_lru_add_after_commit(page); + if (removed) { + add_page_to_lru_list(zone, page, page_lru(page)); + SetPageLRU(page); + } + spin_unlock_irqrestore(&zone->lru_lock, flags); return; } @@ -2837,6 +2695,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { struct mem_cgroup *memcg = NULL; + enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; int ret; if (mem_cgroup_disabled()) @@ -2846,31 +2705,16 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, if (unlikely(!mm)) mm = &init_mm; + if (!page_is_file_cache(page)) + type = MEM_CGROUP_CHARGE_TYPE_SHMEM; - if (page_is_file_cache(page)) { - ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true); - if (ret || !memcg) - return ret; - - /* - * FUSE reuses pages without going through the final - * put that would remove them from the LRU list, make - * sure that they get relinked properly. - */ - __mem_cgroup_commit_charge_lrucare(page, memcg, - MEM_CGROUP_CHARGE_TYPE_CACHE); - return ret; - } - /* shmem */ - if (PageSwapCache(page)) { + if (!PageSwapCache(page)) + ret = mem_cgroup_charge_common(page, mm, gfp_mask, type); + else { /* page is swapcache/shmem */ ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg); if (!ret) - __mem_cgroup_commit_charge_swapin(page, memcg, - MEM_CGROUP_CHARGE_TYPE_SHMEM); - } else - ret = mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_SHMEM); - + __mem_cgroup_commit_charge_swapin(page, memcg, type); + } return ret; } @@ -2882,12 +2726,12 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, */ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, - gfp_t mask, struct mem_cgroup **ptr) + gfp_t mask, struct mem_cgroup **memcgp) { struct mem_cgroup *memcg; int ret; - *ptr = NULL; + *memcgp = NULL; if (mem_cgroup_disabled()) return 0; @@ -2905,27 +2749,32 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, memcg = try_get_mem_cgroup_from_page(page); if (!memcg) goto charge_cur_mm; - *ptr = memcg; - ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); + *memcgp = memcg; + ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true); css_put(&memcg->css); + if (ret == -EINTR) + ret = 0; return ret; charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; - return __mem_cgroup_try_charge(mm, mask, 1, ptr, true); + ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); + if (ret == -EINTR) + ret = 0; + return ret; } static void -__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, +__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, enum charge_type ctype) { if (mem_cgroup_disabled()) return; - if (!ptr) + if (!memcg) return; - cgroup_exclude_rmdir(&ptr->css); + cgroup_exclude_rmdir(&memcg->css); - __mem_cgroup_commit_charge_lrucare(page, ptr, ctype); + __mem_cgroup_commit_charge_lrucare(page, memcg, ctype); /* * Now swap is on-memory. This means this page may be * counted both as mem and swap....double count. @@ -2935,21 +2784,22 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, */ if (do_swap_account && PageSwapCache(page)) { swp_entry_t ent = {.val = page_private(page)}; + struct mem_cgroup *swap_memcg; unsigned short id; - struct mem_cgroup *memcg; id = swap_cgroup_record(ent, 0); rcu_read_lock(); - memcg = mem_cgroup_lookup(id); - if (memcg) { + swap_memcg = mem_cgroup_lookup(id); + if (swap_memcg) { /* * This recorded memcg can be obsolete one. So, avoid * calling css_tryget */ - if (!mem_cgroup_is_root(memcg)) - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); - mem_cgroup_swap_statistics(memcg, false); - mem_cgroup_put(memcg); + if (!mem_cgroup_is_root(swap_memcg)) + res_counter_uncharge(&swap_memcg->memsw, + PAGE_SIZE); + mem_cgroup_swap_statistics(swap_memcg, false); + mem_cgroup_put(swap_memcg); } rcu_read_unlock(); } @@ -2958,13 +2808,14 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, * So, rmdir()->pre_destroy() can be called while we do this charge. * In that case, we need to call pre_destroy() again. check it here. */ - cgroup_release_and_wakeup_rmdir(&ptr->css); + cgroup_release_and_wakeup_rmdir(&memcg->css); } -void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) +void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *memcg) { - __mem_cgroup_commit_charge_swapin(page, ptr, - MEM_CGROUP_CHARGE_TYPE_MAPPED); + __mem_cgroup_commit_charge_swapin(page, memcg, + MEM_CGROUP_CHARGE_TYPE_MAPPED); } void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) @@ -3054,7 +2905,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) * Check if our page_cgroup is valid */ pc = lookup_page_cgroup(page); - if (unlikely(!pc || !PageCgroupUsed(pc))) + if (unlikely(!PageCgroupUsed(pc))) return NULL; lock_page_cgroup(pc); @@ -3117,8 +2968,7 @@ void mem_cgroup_uncharge_page(struct page *page) /* early check. */ if (page_mapped(page)) return; - if (page->mapping && !PageAnon(page)) - return; + VM_BUG_ON(page->mapping && !PageAnon(page)); __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); } @@ -3176,6 +3026,23 @@ void mem_cgroup_uncharge_end(void) batch->memcg = NULL; } +/* + * A function for resetting pc->mem_cgroup for newly allocated pages. + * This function should be called if the newpage will be added to LRU + * before start accounting. + */ +void mem_cgroup_reset_owner(struct page *newpage) +{ + struct page_cgroup *pc; + + if (mem_cgroup_disabled()) + return; + + pc = lookup_page_cgroup(newpage); + VM_BUG_ON(PageCgroupUsed(pc)); + pc->mem_cgroup = root_mem_cgroup; +} + #ifdef CONFIG_SWAP /* * called after __delete_from_swap_cache() and drop "page" account. @@ -3293,14 +3160,14 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, * page belongs to. */ int mem_cgroup_prepare_migration(struct page *page, - struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) + struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask) { struct mem_cgroup *memcg = NULL; struct page_cgroup *pc; enum charge_type ctype; int ret = 0; - *ptr = NULL; + *memcgp = NULL; VM_BUG_ON(PageTransHuge(page)); if (mem_cgroup_disabled()) @@ -3351,10 +3218,10 @@ int mem_cgroup_prepare_migration(struct page *page, if (!memcg) return 0; - *ptr = memcg; - ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); + *memcgp = memcg; + ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false); css_put(&memcg->css);/* drop extra refcnt */ - if (ret || *ptr == NULL) { + if (ret) { if (PageAnon(page)) { lock_page_cgroup(pc); ClearPageCgroupMigration(pc); @@ -3364,6 +3231,7 @@ int mem_cgroup_prepare_migration(struct page *page, */ mem_cgroup_uncharge_page(page); } + /* we'll need to revisit this error code (we have -EINTR) */ return -ENOMEM; } /* @@ -3432,12 +3300,51 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, cgroup_release_and_wakeup_rmdir(&memcg->css); } +/* + * At replace page cache, newpage is not under any memcg but it's on + * LRU. So, this function doesn't touch res_counter but handles LRU + * in correct way. Both pages are locked so we cannot race with uncharge. + */ +void mem_cgroup_replace_page_cache(struct page *oldpage, + struct page *newpage) +{ + struct mem_cgroup *memcg; + struct page_cgroup *pc; + enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; + + if (mem_cgroup_disabled()) + return; + + pc = lookup_page_cgroup(oldpage); + /* fix accounting on old pages */ + lock_page_cgroup(pc); + memcg = pc->mem_cgroup; + mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1); + ClearPageCgroupUsed(pc); + unlock_page_cgroup(pc); + + if (PageSwapBacked(oldpage)) + type = MEM_CGROUP_CHARGE_TYPE_SHMEM; + + /* + * Even if newpage->mapping was NULL before starting replacement, + * the newpage may be on LRU(or pagevec for LRU) already. We lock + * LRU while we overwrite pc->mem_cgroup. + */ + __mem_cgroup_commit_charge_lrucare(newpage, memcg, type); +} + #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { struct page_cgroup *pc; pc = lookup_page_cgroup(page); + /* + * Can be NULL while feeding pages into the page allocator for + * the first time, i.e. during boot or memory hotplug; + * or when mem_cgroup_disabled(). + */ if (likely(pc) && PageCgroupUsed(pc)) return pc; return NULL; @@ -3457,23 +3364,8 @@ void mem_cgroup_print_bad_page(struct page *page) pc = lookup_page_cgroup_used(page); if (pc) { - int ret = -1; - char *path; - - printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p", + printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p\n", pc, pc->flags, pc->mem_cgroup); - - path = kmalloc(PATH_MAX, GFP_KERNEL); - if (path) { - rcu_read_lock(); - ret = cgroup_path(pc->mem_cgroup->css.cgroup, - path, PATH_MAX); - rcu_read_unlock(); - } - - printk(KERN_CONT "(%s)\n", - (ret < 0) ? "cannot get the path" : path); - kfree(path); } } #endif @@ -3534,9 +3426,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, if (!ret) break; - mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, - MEM_CGROUP_RECLAIM_SHRINK, - NULL); + mem_cgroup_reclaim(memcg, GFP_KERNEL, + MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -3594,10 +3485,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, if (!ret) break; - mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, - MEM_CGROUP_RECLAIM_NOSWAP | - MEM_CGROUP_RECLAIM_SHRINK, - NULL); + mem_cgroup_reclaim(memcg, GFP_KERNEL, + MEM_CGROUP_RECLAIM_NOSWAP | + MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -3640,10 +3530,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, break; nr_scanned = 0; - reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, - gfp_mask, - MEM_CGROUP_RECLAIM_SOFT, - &nr_scanned); + reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone, + gfp_mask, &nr_scanned); nr_reclaimed += reclaimed; *total_scanned += nr_scanned; spin_lock(&mctz->lock); @@ -3711,22 +3599,23 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, int node, int zid, enum lru_list lru) { - struct zone *zone; struct mem_cgroup_per_zone *mz; - struct page_cgroup *pc, *busy; unsigned long flags, loop; struct list_head *list; + struct page *busy; + struct zone *zone; int ret = 0; zone = &NODE_DATA(node)->node_zones[zid]; mz = mem_cgroup_zoneinfo(memcg, node, zid); - list = &mz->lists[lru]; + list = &mz->lruvec.lists[lru]; loop = MEM_CGROUP_ZSTAT(mz, lru); /* give some margin against EBUSY etc...*/ loop += 256; busy = NULL; while (loop--) { + struct page_cgroup *pc; struct page *page; ret = 0; @@ -3735,24 +3624,24 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, spin_unlock_irqrestore(&zone->lru_lock, flags); break; } - pc = list_entry(list->prev, struct page_cgroup, lru); - if (busy == pc) { - list_move(&pc->lru, list); + page = list_entry(list->prev, struct page, lru); + if (busy == page) { + list_move(&page->lru, list); busy = NULL; spin_unlock_irqrestore(&zone->lru_lock, flags); continue; } spin_unlock_irqrestore(&zone->lru_lock, flags); - page = lookup_cgroup_page(pc); + pc = lookup_page_cgroup(page); ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL); - if (ret == -ENOMEM) + if (ret == -ENOMEM || ret == -EINTR) break; if (ret == -EBUSY || ret == -EINVAL) { /* found lock contention or "pc" is obsolete. */ - busy = pc; + busy = page; cond_resched(); } else busy = NULL; @@ -4846,7 +4735,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; for_each_lru(l) - INIT_LIST_HEAD(&mz->lists[l]); + INIT_LIST_HEAD(&mz->lruvec.lists[l]); mz->usage_in_excess = 0; mz->on_tree = false; mz->mem = memcg; @@ -4906,7 +4795,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) mem_cgroup_remove_from_trees(memcg); free_css_id(&mem_cgroup_subsys, &memcg->css); - for_each_node_state(node, N_POSSIBLE) + for_each_node(node) free_mem_cgroup_per_zone_info(memcg, node); free_percpu(memcg->stat); @@ -4965,13 +4854,13 @@ static int mem_cgroup_soft_limit_tree_init(void) struct mem_cgroup_tree_per_zone *rtpz; int tmp, node, zone; - for_each_node_state(node, N_POSSIBLE) { + for_each_node(node) { tmp = node; if (!node_state(node, N_NORMAL_MEMORY)) tmp = -1; rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); if (!rtpn) - return 1; + goto err_cleanup; soft_limit_tree.rb_tree_per_node[node] = rtpn; @@ -4982,6 +4871,16 @@ static int mem_cgroup_soft_limit_tree_init(void) } } return 0; + +err_cleanup: + for_each_node(node) { + if (!soft_limit_tree.rb_tree_per_node[node]) + break; + kfree(soft_limit_tree.rb_tree_per_node[node]); + soft_limit_tree.rb_tree_per_node[node] = NULL; + } + return 1; + } static struct cgroup_subsys_state * __ref @@ -4995,7 +4894,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) if (!memcg) return ERR_PTR(error); - for_each_node_state(node, N_POSSIBLE) + for_each_node(node) if (alloc_mem_cgroup_per_zone_info(memcg, node)) goto free_out; @@ -5033,7 +4932,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) res_counter_init(&memcg->res, NULL); res_counter_init(&memcg->memsw, NULL); } - memcg->last_scanned_child = 0; memcg->last_scanned_node = MAX_NUMNODES; INIT_LIST_HEAD(&memcg->oom_notify); @@ -5129,9 +5027,9 @@ one_by_one: } ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &memcg, false); - if (ret || !memcg) + if (ret) /* mem_cgroup_clear_mc() will do uncharge later */ - return -ENOMEM; + return ret; mc.precharge++; } return ret; @@ -5276,7 +5174,7 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma, } /* There is a swap entry and a page doesn't exist or isn't charged */ if (ent.val && !ret && - css_id(&mc.from->css) == lookup_swap_cgroup(ent)) { + css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) { ret = MC_TARGET_SWAP; if (target) target->ent = ent; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 06d3479513a..56080ea3614 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1557,7 +1557,7 @@ int soft_offline_page(struct page *page, int flags) page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, - 0, true); + 0, MIGRATE_SYNC); if (ret) { putback_lru_pages(&pagelist); pr_info("soft offline: %#lx: migration failed %d, type %lx\n", diff --git a/mm/memory.c b/mm/memory.c index 829d4373540..5e30583c260 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -293,7 +293,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { struct mmu_gather_batch *batch; - tlb->need_flush = 1; + VM_BUG_ON(!tlb->need_flush); if (tlb_fast_mode(tlb)) { free_page_and_swap_cache(page); @@ -1231,7 +1231,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, if (next-addr != HPAGE_PMD_SIZE) { VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); split_huge_page_pmd(vma->vm_mm, pmd); - } else if (zap_huge_pmd(tlb, vma, pmd)) + } else if (zap_huge_pmd(tlb, vma, pmd, addr)) continue; /* fall through */ } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2168489c0bc..6629fafd6ce 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -809,7 +809,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) } /* this function returns # of failed pages */ ret = migrate_pages(&source, hotremove_migrate_alloc, 0, - true, true); + true, MIGRATE_SYNC); if (ret) putback_lru_pages(&source); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e3d58f08846..06b145fb64a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -942,7 +942,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_node_page, dest, - false, true); + false, MIGRATE_SYNC); if (err) putback_lru_pages(&pagelist); } diff --git a/mm/migrate.c b/mm/migrate.c index 89ea0854332..9871a56d82c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -216,6 +216,56 @@ out: pte_unmap_unlock(ptep, ptl); } +#ifdef CONFIG_BLOCK +/* Returns true if all buffers are successfully locked */ +static bool buffer_migrate_lock_buffers(struct buffer_head *head, + enum migrate_mode mode) +{ + struct buffer_head *bh = head; + + /* Simple case, sync compaction */ + if (mode != MIGRATE_ASYNC) { + do { + get_bh(bh); + lock_buffer(bh); + bh = bh->b_this_page; + + } while (bh != head); + + return true; + } + + /* async case, we cannot block on lock_buffer so use trylock_buffer */ + do { + get_bh(bh); + if (!trylock_buffer(bh)) { + /* + * We failed to lock the buffer and cannot stall in + * async migration. Release the taken locks + */ + struct buffer_head *failed_bh = bh; + put_bh(failed_bh); + bh = head; + while (bh != failed_bh) { + unlock_buffer(bh); + put_bh(bh); + bh = bh->b_this_page; + } + return false; + } + + bh = bh->b_this_page; + } while (bh != head); + return true; +} +#else +static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, + enum migrate_mode mode) +{ + return true; +} +#endif /* CONFIG_BLOCK */ + /* * Replace the page in the mapping. * @@ -225,7 +275,8 @@ out: * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ static int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + struct buffer_head *head, enum migrate_mode mode) { int expected_count; void **pslot; @@ -255,6 +306,20 @@ static int migrate_page_move_mapping(struct address_space *mapping, } /* + * In the async migration case of moving a page with buffers, lock the + * buffers using trylock before the mapping is moved. If the mapping + * was moved, we later failed to lock the buffers and could not move + * the mapping back due to an elevated page count, we would have to + * block waiting on other references to be dropped. + */ + if (mode == MIGRATE_ASYNC && head && + !buffer_migrate_lock_buffers(head, mode)) { + page_unfreeze_refs(page, expected_count); + spin_unlock_irq(&mapping->tree_lock); + return -EAGAIN; + } + + /* * Now we know that no one else is looking at the page. */ get_page(newpage); /* add cache reference */ @@ -409,13 +474,14 @@ EXPORT_SYMBOL(fail_migrate_page); * Pages are locked upon entry and exit. */ int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { int rc; BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page); + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); if (rc) return rc; @@ -432,28 +498,28 @@ EXPORT_SYMBOL(migrate_page); * exist. */ int buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, enum migrate_mode mode) { struct buffer_head *bh, *head; int rc; if (!page_has_buffers(page)) - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); head = page_buffers(page); - rc = migrate_page_move_mapping(mapping, newpage, page); + rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); if (rc) return rc; - bh = head; - do { - get_bh(bh); - lock_buffer(bh); - bh = bh->b_this_page; - - } while (bh != head); + /* + * In the async case, migrate_page_move_mapping locked the buffers + * with an IRQ-safe spinlock held. In the sync case, the buffers + * need to be locked now + */ + if (mode != MIGRATE_ASYNC) + BUG_ON(!buffer_migrate_lock_buffers(head, mode)); ClearPagePrivate(page); set_page_private(newpage, page_private(page)); @@ -530,10 +596,14 @@ static int writeout(struct address_space *mapping, struct page *page) * Default handling if a filesystem does not provide a migration function. */ static int fallback_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, enum migrate_mode mode) { - if (PageDirty(page)) + if (PageDirty(page)) { + /* Only writeback pages in full synchronous migration */ + if (mode != MIGRATE_SYNC) + return -EBUSY; return writeout(mapping, page); + } /* * Buffers may be managed in a filesystem specific way. @@ -543,7 +613,7 @@ static int fallback_migrate_page(struct address_space *mapping, !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } /* @@ -558,7 +628,7 @@ static int fallback_migrate_page(struct address_space *mapping, * == 0 - success */ static int move_to_new_page(struct page *newpage, struct page *page, - int remap_swapcache, bool sync) + int remap_swapcache, enum migrate_mode mode) { struct address_space *mapping; int rc; @@ -579,29 +649,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, mapping = page_mapping(page); if (!mapping) - rc = migrate_page(mapping, newpage, page); - else { + rc = migrate_page(mapping, newpage, page, mode); + else if (mapping->a_ops->migratepage) /* - * Do not writeback pages if !sync and migratepage is - * not pointing to migrate_page() which is nonblocking - * (swapcache/tmpfs uses migratepage = migrate_page). + * Most pages have a mapping and most filesystems provide a + * migratepage callback. Anonymous pages are part of swap + * space which also has its own migratepage callback. This + * is the most common path for page migration. */ - if (PageDirty(page) && !sync && - mapping->a_ops->migratepage != migrate_page) - rc = -EBUSY; - else if (mapping->a_ops->migratepage) - /* - * Most pages have a mapping and most filesystems - * should provide a migration function. Anonymous - * pages are part of swap space which also has its - * own migration function. This is the most common - * path for page migration. - */ - rc = mapping->a_ops->migratepage(mapping, - newpage, page); - else - rc = fallback_migrate_page(mapping, newpage, page); - } + rc = mapping->a_ops->migratepage(mapping, + newpage, page, mode); + else + rc = fallback_migrate_page(mapping, newpage, page, mode); if (rc) { newpage->mapping = NULL; @@ -616,7 +675,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, } static int __unmap_and_move(struct page *page, struct page *newpage, - int force, bool offlining, bool sync) + int force, bool offlining, enum migrate_mode mode) { int rc = -EAGAIN; int remap_swapcache = 1; @@ -625,7 +684,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, struct anon_vma *anon_vma = NULL; if (!trylock_page(page)) { - if (!force || !sync) + if (!force || mode == MIGRATE_ASYNC) goto out; /* @@ -671,10 +730,12 @@ static int __unmap_and_move(struct page *page, struct page *newpage, if (PageWriteback(page)) { /* - * For !sync, there is no point retrying as the retry loop - * is expected to be too short for PageWriteback to be cleared + * Only in the case of a full syncronous migration is it + * necessary to wait for PageWriteback. In the async case, + * the retry loop is too short and in the sync-light case, + * the overhead of stalling is too much */ - if (!sync) { + if (mode != MIGRATE_SYNC) { rc = -EBUSY; goto uncharge; } @@ -745,7 +806,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, skip_unmap: if (!page_mapped(page)) - rc = move_to_new_page(newpage, page, remap_swapcache, sync); + rc = move_to_new_page(newpage, page, remap_swapcache, mode); if (rc && remap_swapcache) remove_migration_ptes(page, page); @@ -768,7 +829,8 @@ out: * to the newly allocated page in newpage. */ static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, bool sync) + struct page *page, int force, bool offlining, + enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -777,6 +839,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (!newpage) return -ENOMEM; + mem_cgroup_reset_owner(newpage); + if (page_count(page) == 1) { /* page was freed from under us. So we are done. */ goto out; @@ -786,7 +850,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (unlikely(split_huge_page(page))) goto out; - rc = __unmap_and_move(page, newpage, force, offlining, sync); + rc = __unmap_and_move(page, newpage, force, offlining, mode); out: if (rc != -EAGAIN) { /* @@ -834,7 +898,8 @@ out: */ static int unmap_and_move_huge_page(new_page_t get_new_page, unsigned long private, struct page *hpage, - int force, bool offlining, bool sync) + int force, bool offlining, + enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -847,7 +912,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, rc = -EAGAIN; if (!trylock_page(hpage)) { - if (!force || !sync) + if (!force || mode != MIGRATE_SYNC) goto out; lock_page(hpage); } @@ -858,7 +923,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); if (!page_mapped(hpage)) - rc = move_to_new_page(new_hpage, hpage, 1, sync); + rc = move_to_new_page(new_hpage, hpage, 1, mode); if (rc) remove_migration_ptes(hpage, hpage); @@ -901,7 +966,7 @@ out: */ int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, - bool sync) + enum migrate_mode mode) { int retry = 1; int nr_failed = 0; @@ -922,7 +987,7 @@ int migrate_pages(struct list_head *from, rc = unmap_and_move(get_new_page, private, page, pass > 2, offlining, - sync); + mode); switch(rc) { case -ENOMEM: @@ -952,7 +1017,7 @@ out: int migrate_huge_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, - bool sync) + enum migrate_mode mode) { int retry = 1; int nr_failed = 0; @@ -969,7 +1034,7 @@ int migrate_huge_pages(struct list_head *from, rc = unmap_and_move_huge_page(get_new_page, private, page, pass > 2, offlining, - sync); + mode); switch(rc) { case -ENOMEM: @@ -1098,7 +1163,7 @@ set_status: err = 0; if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_page_node, - (unsigned long)pm, 0, true); + (unsigned long)pm, 0, MIGRATE_SYNC); if (err) putback_lru_pages(&pagelist); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 7c122faa05c..2958fd8e7c9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -152,7 +152,7 @@ struct task_struct *find_lock_task_mm(struct task_struct *p) /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, - const struct mem_cgroup *mem, const nodemask_t *nodemask) + const struct mem_cgroup *memcg, const nodemask_t *nodemask) { if (is_global_init(p)) return true; @@ -160,7 +160,7 @@ static bool oom_unkillable_task(struct task_struct *p, return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ - if (mem && !task_in_mem_cgroup(p, mem)) + if (memcg && !task_in_mem_cgroup(p, memcg)) return true; /* p may not have freeable memory in nodemask */ @@ -179,12 +179,12 @@ static bool oom_unkillable_task(struct task_struct *p, * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, +unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; - if (oom_unkillable_task(p, mem, nodemask)) + if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); @@ -308,7 +308,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, * (not docbooked, we don't want this one cluttering up the manual) */ static struct task_struct *select_bad_process(unsigned int *ppoints, - unsigned long totalpages, struct mem_cgroup *mem, + unsigned long totalpages, struct mem_cgroup *memcg, const nodemask_t *nodemask) { struct task_struct *g, *p; @@ -320,7 +320,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, if (p->exit_state) continue; - if (oom_unkillable_task(p, mem, nodemask)) + if (oom_unkillable_task(p, memcg, nodemask)) continue; /* @@ -364,7 +364,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, } } - points = oom_badness(p, mem, nodemask, totalpages); + points = oom_badness(p, memcg, nodemask, totalpages); if (points > *ppoints) { chosen = p; *ppoints = points; @@ -387,14 +387,14 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, * * Call with tasklist_lock read-locked. */ -static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) +static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) { struct task_struct *p; struct task_struct *task; pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); for_each_process(p) { - if (oom_unkillable_task(p, mem, nodemask)) + if (oom_unkillable_task(p, memcg, nodemask)) continue; task = find_lock_task_mm(p); @@ -417,7 +417,7 @@ static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) } static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, - struct mem_cgroup *mem, const nodemask_t *nodemask) + struct mem_cgroup *memcg, const nodemask_t *nodemask) { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " @@ -427,14 +427,14 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); - mem_cgroup_print_oom_info(mem, p); + mem_cgroup_print_oom_info(memcg, p); show_mem(SHOW_MEM_FILTER_NODES); if (sysctl_oom_dump_tasks) - dump_tasks(mem, nodemask); + dump_tasks(memcg, nodemask); } #define K(x) ((x) << (PAGE_SHIFT-10)) -static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) +static int oom_kill_task(struct task_struct *p) { struct task_struct *q; struct mm_struct *mm; @@ -484,7 +484,7 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, - struct mem_cgroup *mem, nodemask_t *nodemask, + struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message) { struct task_struct *victim = p; @@ -493,7 +493,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int victim_points = 0; if (printk_ratelimit()) - dump_header(p, gfp_mask, order, mem, nodemask); + dump_header(p, gfp_mask, order, memcg, nodemask); /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -524,7 +524,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* * oom_badness() returns 0 if the thread is unkillable */ - child_points = oom_badness(child, mem, nodemask, + child_points = oom_badness(child, memcg, nodemask, totalpages); if (child_points > victim_points) { victim = child; @@ -533,7 +533,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, } } while_each_thread(p, t); - return oom_kill_task(victim, mem); + return oom_kill_task(victim); } /* @@ -561,7 +561,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, } #ifdef CONFIG_CGROUP_MEM_RES_CTLR -void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) +void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask) { unsigned long limit; unsigned int points = 0; @@ -578,14 +578,14 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) } check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); - limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; + limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT; read_lock(&tasklist_lock); retry: - p = select_bad_process(&points, limit, mem, NULL); + p = select_bad_process(&points, limit, memcg, NULL); if (!p || PTR_ERR(p) == -1UL) goto out; - if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL, + if (oom_kill_process(p, gfp_mask, 0, points, limit, memcg, NULL, "Memory cgroup out of memory")) goto retry; out: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 794e6715c22..0027d8f4a1b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1981,14 +1981,20 @@ static struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress, - bool sync_migration) + int migratetype, bool sync_migration, + bool *deferred_compaction, + unsigned long *did_some_progress) { struct page *page; - if (!order || compaction_deferred(preferred_zone)) + if (!order) return NULL; + if (compaction_deferred(preferred_zone)) { + *deferred_compaction = true; + return NULL; + } + current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask, sync_migration); @@ -2016,7 +2022,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, * but not enough to satisfy watermarks. */ count_vm_event(COMPACTFAIL); - defer_compaction(preferred_zone); + + /* + * As async compaction considers a subset of pageblocks, only + * defer if the failure was a sync compaction failure. + */ + if (sync_migration) + defer_compaction(preferred_zone); cond_resched(); } @@ -2028,8 +2040,9 @@ static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress, - bool sync_migration) + int migratetype, bool sync_migration, + bool *deferred_compaction, + unsigned long *did_some_progress) { return NULL; } @@ -2179,6 +2192,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned long pages_reclaimed = 0; unsigned long did_some_progress; bool sync_migration = false; + bool deferred_compaction = false; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -2259,12 +2273,22 @@ rebalance: zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, - migratetype, &did_some_progress, - sync_migration); + migratetype, sync_migration, + &deferred_compaction, + &did_some_progress); if (page) goto got_pg; sync_migration = true; + /* + * If compaction is deferred for high-order allocations, it is because + * sync compaction recently failed. In this is the case and the caller + * has requested the system not be heavily disrupted, fail the + * allocation now instead of entering direct reclaim + */ + if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) + goto nopage; + /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, zonelist, high_zoneidx, @@ -2328,8 +2352,9 @@ rebalance: zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, - migratetype, &did_some_progress, - sync_migration); + migratetype, sync_migration, + &deferred_compaction, + &did_some_progress); if (page) goto got_pg; } @@ -4237,7 +4262,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; - enum lru_list l; + enum lru_list lru; size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, @@ -4287,8 +4312,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone->zone_pgdat = pgdat; zone_pcp_init(zone); - for_each_lru(l) - INIT_LIST_HEAD(&zone->lru[l].list); + for_each_lru(lru) + INIT_LIST_HEAD(&zone->lruvec.lists[lru]); zone->reclaim_stat.recent_rotated[0] = 0; zone->reclaim_stat.recent_rotated[1] = 0; zone->reclaim_stat.recent_scanned[0] = 0; @@ -4642,8 +4667,10 @@ static void check_for_regular_memory(pg_data_t *pgdat) for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; - if (zone->present_pages) + if (zone->present_pages) { node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); + break; + } } #endif } diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 2d123f94a8d..de1616aa9b1 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -11,13 +11,6 @@ #include <linux/swapops.h> #include <linux/kmemleak.h> -static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) -{ - pc->flags = 0; - set_page_cgroup_array_id(pc, id); - pc->mem_cgroup = NULL; - INIT_LIST_HEAD(&pc->lru); -} static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) @@ -35,35 +28,27 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) struct page_cgroup *base; base = NODE_DATA(page_to_nid(page))->node_page_cgroup; +#ifdef CONFIG_DEBUG_VM + /* + * The sanity checks the page allocator does upon freeing a + * page can reach here before the page_cgroup arrays are + * allocated when feeding a range of pages to the allocator + * for the first time during bootup or memory hotplug. + */ if (unlikely(!base)) return NULL; - +#endif offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn; return base + offset; } -struct page *lookup_cgroup_page(struct page_cgroup *pc) -{ - unsigned long pfn; - struct page *page; - pg_data_t *pgdat; - - pgdat = NODE_DATA(page_cgroup_array_id(pc)); - pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; - page = pfn_to_page(pfn); - VM_BUG_ON(pc != lookup_page_cgroup(page)); - return page; -} - static int __init alloc_node_page_cgroup(int nid) { - struct page_cgroup *base, *pc; + struct page_cgroup *base; unsigned long table_size; - unsigned long start_pfn, nr_pages, index; + unsigned long nr_pages; - start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; - if (!nr_pages) return 0; @@ -73,10 +58,6 @@ static int __init alloc_node_page_cgroup(int nid) table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) return -ENOMEM; - for (index = 0; index < nr_pages; index++) { - pc = base + index; - init_page_cgroup(pc, nid); - } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; return 0; @@ -111,29 +92,23 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); - +#ifdef CONFIG_DEBUG_VM + /* + * The sanity checks the page allocator does upon freeing a + * page can reach here before the page_cgroup arrays are + * allocated when feeding a range of pages to the allocator + * for the first time during bootup or memory hotplug. + */ if (!section->page_cgroup) return NULL; +#endif return section->page_cgroup + pfn; } -struct page *lookup_cgroup_page(struct page_cgroup *pc) -{ - struct mem_section *section; - struct page *page; - unsigned long nr; - - nr = page_cgroup_array_id(pc); - section = __nr_to_section(nr); - page = pfn_to_page(pc - section->page_cgroup); - VM_BUG_ON(pc != lookup_page_cgroup(page)); - return page; -} - static void *__meminit alloc_page_cgroup(size_t size, int nid) { + gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; void *addr = NULL; - gfp_t flags = GFP_KERNEL | __GFP_NOWARN; addr = alloc_pages_exact_nid(nid, size, flags); if (addr) { @@ -142,39 +117,20 @@ static void *__meminit alloc_page_cgroup(size_t size, int nid) } if (node_state(nid, N_HIGH_MEMORY)) - addr = vmalloc_node(size, nid); + addr = vzalloc_node(size, nid); else - addr = vmalloc(size); + addr = vzalloc(size); return addr; } -#ifdef CONFIG_MEMORY_HOTPLUG -static void free_page_cgroup(void *addr) -{ - if (is_vmalloc_addr(addr)) { - vfree(addr); - } else { - struct page *page = virt_to_page(addr); - size_t table_size = - sizeof(struct page_cgroup) * PAGES_PER_SECTION; - - BUG_ON(PageReserved(page)); - free_pages_exact(addr, table_size); - } -} -#endif - static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) { - struct page_cgroup *base, *pc; struct mem_section *section; + struct page_cgroup *base; unsigned long table_size; - unsigned long nr; - int index; - nr = pfn_to_section_nr(pfn); - section = __nr_to_section(nr); + section = __pfn_to_section(pfn); if (section->page_cgroup) return 0; @@ -194,10 +150,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) return -ENOMEM; } - for (index = 0; index < PAGES_PER_SECTION; index++) { - pc = base + index; - init_page_cgroup(pc, nr); - } /* * The passed "pfn" may not be aligned to SECTION. For the calculation * we need to apply a mask. @@ -208,6 +160,20 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) return 0; } #ifdef CONFIG_MEMORY_HOTPLUG +static void free_page_cgroup(void *addr) +{ + if (is_vmalloc_addr(addr)) { + vfree(addr); + } else { + struct page *page = virt_to_page(addr); + size_t table_size = + sizeof(struct page_cgroup) * PAGES_PER_SECTION; + + BUG_ON(PageReserved(page)); + free_pages_exact(addr, table_size); + } +} + void __free_page_cgroup(unsigned long pfn) { struct mem_section *ms; @@ -366,7 +332,6 @@ struct swap_cgroup { unsigned short id; }; #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) -#define SC_POS_MASK (SC_PER_PAGE - 1) /* * SwapCgroup implements "lookup" and "exchange" operations. @@ -408,6 +373,21 @@ not_enough_page: return -ENOMEM; } +static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, + struct swap_cgroup_ctrl **ctrlp) +{ + pgoff_t offset = swp_offset(ent); + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; + if (ctrlp) + *ctrlp = ctrl; + + mappage = ctrl->map[offset / SC_PER_PAGE]; + return page_address(mappage) + offset % SC_PER_PAGE; +} + /** * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. * @end: swap entry to be cmpxchged @@ -420,21 +400,13 @@ not_enough_page: unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; - struct page *mappage; struct swap_cgroup *sc; unsigned long flags; unsigned short retval; - ctrl = &swap_cgroup_ctrl[type]; + sc = lookup_swap_cgroup(ent, &ctrl); - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; spin_lock_irqsave(&ctrl->lock, flags); retval = sc->id; if (retval == old) @@ -455,21 +427,13 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, */ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; - struct page *mappage; struct swap_cgroup *sc; unsigned short old; unsigned long flags; - ctrl = &swap_cgroup_ctrl[type]; + sc = lookup_swap_cgroup(ent, &ctrl); - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; spin_lock_irqsave(&ctrl->lock, flags); old = sc->id; sc->id = id; @@ -479,28 +443,14 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) } /** - * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry + * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry * @ent: swap entry to be looked up. * * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) */ -unsigned short lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; - struct swap_cgroup_ctrl *ctrl; - struct page *mappage; - struct swap_cgroup *sc; - unsigned short ret; - - ctrl = &swap_cgroup_ctrl[type]; - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; - ret = sc->id; - return ret; + return lookup_swap_cgroup(ent, NULL)->id; } int swap_cgroup_swapon(int type, unsigned long max_pages) diff --git a/mm/rmap.c b/mm/rmap.c index a2e5ce1fa08..c8454e06b6c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -773,7 +773,7 @@ out: } static int page_referenced_anon(struct page *page, - struct mem_cgroup *mem_cont, + struct mem_cgroup *memcg, unsigned long *vm_flags) { unsigned int mapcount; @@ -796,7 +796,7 @@ static int page_referenced_anon(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; referenced += page_referenced_one(page, vma, address, &mapcount, vm_flags); @@ -811,7 +811,7 @@ static int page_referenced_anon(struct page *page, /** * page_referenced_file - referenced check for object-based rmap * @page: the page we're checking references on. - * @mem_cont: target memory controller + * @memcg: target memory control group * @vm_flags: collect encountered vma->vm_flags who actually referenced the page * * For an object-based mapped page, find all the places it is mapped and @@ -822,7 +822,7 @@ static int page_referenced_anon(struct page *page, * This function is only called from page_referenced for object-based pages. */ static int page_referenced_file(struct page *page, - struct mem_cgroup *mem_cont, + struct mem_cgroup *memcg, unsigned long *vm_flags) { unsigned int mapcount; @@ -864,7 +864,7 @@ static int page_referenced_file(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; referenced += page_referenced_one(page, vma, address, &mapcount, vm_flags); @@ -880,7 +880,7 @@ static int page_referenced_file(struct page *page, * page_referenced - test if the page was referenced * @page: the page to test * @is_locked: caller holds lock on the page - * @mem_cont: target memory controller + * @memcg: target memory cgroup * @vm_flags: collect encountered vma->vm_flags who actually referenced the page * * Quick test_and_clear_referenced for all mappings to a page, @@ -888,7 +888,7 @@ static int page_referenced_file(struct page *page, */ int page_referenced(struct page *page, int is_locked, - struct mem_cgroup *mem_cont, + struct mem_cgroup *memcg, unsigned long *vm_flags) { int referenced = 0; @@ -904,13 +904,13 @@ int page_referenced(struct page *page, } } if (unlikely(PageKsm(page))) - referenced += page_referenced_ksm(page, mem_cont, + referenced += page_referenced_ksm(page, memcg, vm_flags); else if (PageAnon(page)) - referenced += page_referenced_anon(page, mem_cont, + referenced += page_referenced_anon(page, memcg, vm_flags); else if (page->mapping) - referenced += page_referenced_file(page, mem_cont, + referenced += page_referenced_file(page, memcg, vm_flags); if (we_locked) unlock_page(page); diff --git a/mm/slub.c b/mm/slub.c index 5d37b5e4414..4907563ef7f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -366,7 +366,8 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page const char *n) { VM_BUG_ON(!irqs_disabled()); -#ifdef CONFIG_CMPXCHG_DOUBLE +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (s->flags & __CMPXCHG_DOUBLE) { if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, @@ -400,7 +401,8 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, void *freelist_new, unsigned long counters_new, const char *n) { -#ifdef CONFIG_CMPXCHG_DOUBLE +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (s->flags & __CMPXCHG_DOUBLE) { if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, @@ -3014,7 +3016,8 @@ static int kmem_cache_open(struct kmem_cache *s, } } -#ifdef CONFIG_CMPXCHG_DOUBLE +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) /* Enable fast mode */ s->flags |= __CMPXCHG_DOUBLE; diff --git a/mm/swap.c b/mm/swap.c index 67a09a633a0..b0f529b3897 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -23,7 +23,6 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/mm_inline.h> -#include <linux/buffer_head.h> /* for try_to_release_page() */ #include <linux/percpu_counter.h> #include <linux/percpu.h> #include <linux/cpu.h> @@ -54,7 +53,7 @@ static void __page_cache_release(struct page *page) spin_lock_irqsave(&zone->lru_lock, flags); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); - del_page_from_lru(zone, page); + del_page_from_lru_list(zone, page, page_off_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } } @@ -232,12 +231,14 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, static void pagevec_move_tail_fn(struct page *page, void *arg) { int *pgmoved = arg; - struct zone *zone = page_zone(page); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { enum lru_list lru = page_lru_base_type(page); - list_move_tail(&page->lru, &zone->lru[lru].list); - mem_cgroup_rotate_reclaimable_page(page); + struct lruvec *lruvec; + + lruvec = mem_cgroup_lru_move_lists(page_zone(page), + page, lru, lru); + list_move_tail(&page->lru, &lruvec->lists[lru]); (*pgmoved)++; } } @@ -368,7 +369,6 @@ void mark_page_accessed(struct page *page) SetPageReferenced(page); } } - EXPORT_SYMBOL(mark_page_accessed); void __lru_cache_add(struct page *page, enum lru_list lru) @@ -377,7 +377,7 @@ void __lru_cache_add(struct page *page, enum lru_list lru) page_cache_get(page); if (!pagevec_add(pvec, page)) - ____pagevec_lru_add(pvec, lru); + __pagevec_lru_add(pvec, lru); put_cpu_var(lru_add_pvecs); } EXPORT_SYMBOL(__lru_cache_add); @@ -476,12 +476,13 @@ static void lru_deactivate_fn(struct page *page, void *arg) */ SetPageReclaim(page); } else { + struct lruvec *lruvec; /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ - list_move_tail(&page->lru, &zone->lru[lru].list); - mem_cgroup_rotate_reclaimable_page(page); + lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru); + list_move_tail(&page->lru, &lruvec->lists[lru]); __count_vm_event(PGROTATED); } @@ -504,7 +505,7 @@ static void drain_cpu_pagevecs(int cpu) for_each_lru(lru) { pvec = &pvecs[lru - LRU_BASE]; if (pagevec_count(pvec)) - ____pagevec_lru_add(pvec, lru); + __pagevec_lru_add(pvec, lru); } pvec = &per_cpu(lru_rotate_pvecs, cpu); @@ -616,7 +617,7 @@ void release_pages(struct page **pages, int nr, int cold) } VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); - del_page_from_lru(zone, page); + del_page_from_lru_list(zone, page, page_off_lru(page)); } list_add(&page->lru, &pages_to_free); @@ -644,9 +645,9 @@ void __pagevec_release(struct pagevec *pvec) release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); pagevec_reinit(pvec); } - EXPORT_SYMBOL(__pagevec_release); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct zone* zone, struct page *page, struct page *page_tail) @@ -654,7 +655,6 @@ void lru_add_page_tail(struct zone* zone, int active; enum lru_list lru; const int file = 0; - struct list_head *head; VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); @@ -673,18 +673,30 @@ void lru_add_page_tail(struct zone* zone, lru = LRU_INACTIVE_ANON; } update_page_reclaim_stat(zone, page_tail, file, active); - if (likely(PageLRU(page))) - head = page->lru.prev; - else - head = &zone->lru[lru].list; - __add_page_to_lru_list(zone, page_tail, lru, head); } else { SetPageUnevictable(page_tail); - add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); + lru = LRU_UNEVICTABLE; + } + + if (likely(PageLRU(page))) + list_add_tail(&page_tail->lru, &page->lru); + else { + struct list_head *list_head; + /* + * Head page has not yet been counted, as an hpage, + * so we must account for each subpage individually. + * + * Use the standard add function to put page_tail on the list, + * but then correct its position so they all end up in order. + */ + add_page_to_lru_list(zone, page_tail, lru); + list_head = page_tail->lru.prev; + list_move_tail(&page_tail->lru, list_head); } } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static void ____pagevec_lru_add_fn(struct page *page, void *arg) +static void __pagevec_lru_add_fn(struct page *page, void *arg) { enum lru_list lru = (enum lru_list)arg; struct zone *zone = page_zone(page); @@ -706,32 +718,13 @@ static void ____pagevec_lru_add_fn(struct page *page, void *arg) * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ -void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) +void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { VM_BUG_ON(is_unevictable_lru(lru)); - pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru); -} - -EXPORT_SYMBOL(____pagevec_lru_add); - -/* - * Try to drop buffers from the pages in a pagevec - */ -void pagevec_strip(struct pagevec *pvec) -{ - int i; - - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - if (page_has_private(page) && trylock_page(page)) { - if (page_has_private(page)) - try_to_release_page(page, 0); - unlock_page(page); - } - } + pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru); } +EXPORT_SYMBOL(__pagevec_lru_add); /** * pagevec_lookup - gang pagecache lookup @@ -755,7 +748,6 @@ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); return pagevec_count(pvec); } - EXPORT_SYMBOL(pagevec_lookup); unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, @@ -765,7 +757,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, nr_pages, pvec->pages); return pagevec_count(pvec); } - EXPORT_SYMBOL(pagevec_lookup_tag); /* diff --git a/mm/swap_state.c b/mm/swap_state.c index ea6b32d6187..470038a9187 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -300,6 +300,16 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, new_page = alloc_page_vma(gfp_mask, vma, addr); if (!new_page) break; /* Out of memory */ + /* + * The memcg-specific accounting when moving + * pages around the LRU lists relies on the + * page's owner (memcg) to be valid. Usually, + * pages are assigned to a new owner before + * being put on the LRU list, but since this + * is not the case here, the stale owner from + * a previous allocation cycle must be reset. + */ + mem_cgroup_reset_owner(new_page); } /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 9520592d423..d999f090dfd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -847,12 +847,13 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { - struct mem_cgroup *ptr; + struct mem_cgroup *memcg; spinlock_t *ptl; pte_t *pte; int ret = 1; - if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) { + if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, + GFP_KERNEL, &memcg)) { ret = -ENOMEM; goto out_nolock; } @@ -860,7 +861,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { if (ret > 0) - mem_cgroup_cancel_charge_swapin(ptr); + mem_cgroup_cancel_charge_swapin(memcg); ret = 0; goto out; } @@ -871,7 +872,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); - mem_cgroup_commit_charge_swapin(page, ptr); + mem_cgroup_commit_charge_swapin(page, memcg); swap_free(entry); /* * Move the page to the active list so it is not diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 877ca046f43..86ce9a526c1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2378,7 +2378,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL); vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL); if (!vas || !vms) - goto err_free; + goto err_free2; for (area = 0; area < nr_vms; area++) { vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL); @@ -2476,11 +2476,10 @@ found: err_free: for (area = 0; area < nr_vms; area++) { - if (vas) - kfree(vas[area]); - if (vms) - kfree(vms[area]); + kfree(vas[area]); + kfree(vms[area]); } +err_free2: kfree(vas); kfree(vms); return NULL; diff --git a/mm/vmscan.c b/mm/vmscan.c index 26f4a8a4e0c..2880396f795 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -103,8 +103,11 @@ struct scan_control { */ reclaim_mode_t reclaim_mode; - /* Which cgroup do we reclaim from */ - struct mem_cgroup *mem_cgroup; + /* + * The memory cgroup that hit its limit and as a result is the + * primary target of this reclaim invocation. + */ + struct mem_cgroup *target_mem_cgroup; /* * Nodemask of nodes allowed by the caller. If NULL, all nodes @@ -113,6 +116,11 @@ struct scan_control { nodemask_t *nodemask; }; +struct mem_cgroup_zone { + struct mem_cgroup *mem_cgroup; + struct zone *zone; +}; + #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) #ifdef ARCH_HAS_PREFETCH @@ -153,28 +161,45 @@ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); #ifdef CONFIG_CGROUP_MEM_RES_CTLR -#define scanning_global_lru(sc) (!(sc)->mem_cgroup) +static bool global_reclaim(struct scan_control *sc) +{ + return !sc->target_mem_cgroup; +} + +static bool scanning_global_lru(struct mem_cgroup_zone *mz) +{ + return !mz->mem_cgroup; +} #else -#define scanning_global_lru(sc) (1) +static bool global_reclaim(struct scan_control *sc) +{ + return true; +} + +static bool scanning_global_lru(struct mem_cgroup_zone *mz) +{ + return true; +} #endif -static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, - struct scan_control *sc) +static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) { - if (!scanning_global_lru(sc)) - return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone); + if (!scanning_global_lru(mz)) + return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); - return &zone->reclaim_stat; + return &mz->zone->reclaim_stat; } -static unsigned long zone_nr_lru_pages(struct zone *zone, - struct scan_control *sc, enum lru_list lru) +static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, + enum lru_list lru) { - if (!scanning_global_lru(sc)) - return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup, - zone_to_nid(zone), zone_idx(zone), BIT(lru)); + if (!scanning_global_lru(mz)) + return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, + zone_to_nid(mz->zone), + zone_idx(mz->zone), + BIT(lru)); - return zone_page_state(zone, NR_LRU_BASE + lru); + return zone_page_state(mz->zone, NR_LRU_BASE + lru); } @@ -677,12 +702,13 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, + struct mem_cgroup_zone *mz, struct scan_control *sc) { int referenced_ptes, referenced_page; unsigned long vm_flags; - referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); + referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); referenced_page = TestClearPageReferenced(page); /* Lumpy reclaim - ignore references */ @@ -738,7 +764,7 @@ static enum page_references page_check_references(struct page *page, * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct zone *zone, + struct mem_cgroup_zone *mz, struct scan_control *sc, int priority, unsigned long *ret_nr_dirty, @@ -769,7 +795,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != zone); + VM_BUG_ON(page_zone(page) != mz->zone); sc->nr_scanned++; @@ -803,7 +829,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, } } - references = page_check_references(page, sc); + references = page_check_references(page, mz, sc); switch (references) { case PAGEREF_ACTIVATE: goto activate_locked; @@ -994,8 +1020,8 @@ keep_lumpy: * back off and wait for congestion to clear because further reclaim * will encounter the same problem */ - if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc)) - zone_set_flag(zone, ZONE_CONGESTED); + if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) + zone_set_flag(mz->zone, ZONE_CONGESTED); free_hot_cold_page_list(&free_pages, 1); @@ -1049,8 +1075,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) ret = -EBUSY; - if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) - return ret; + /* + * To minimise LRU disruption, the caller can indicate that it only + * wants to isolate pages it will be able to operate on without + * blocking - clean pages for the most part. + * + * ISOLATE_CLEAN means that only clean pages should be isolated. This + * is used by reclaim when it is cannot write to backing storage + * + * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages + * that it is possible to migrate without blocking + */ + if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { + /* All the caller can do on PageWriteback is block */ + if (PageWriteback(page)) + return ret; + + if (PageDirty(page)) { + struct address_space *mapping; + + /* ISOLATE_CLEAN means only clean pages */ + if (mode & ISOLATE_CLEAN) + return ret; + + /* + * Only pages without mappings or that have a + * ->migratepage callback are possible to migrate + * without blocking + */ + mapping = page_mapping(page); + if (mapping && !mapping->a_ops->migratepage) + return ret; + } + } if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) return ret; @@ -1079,25 +1136,36 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) * Appropriate locks must be held before calling this function. * * @nr_to_scan: The number of pages to look through on the list. - * @src: The LRU list to pull pages off. + * @mz: The mem_cgroup_zone to pull pages from. * @dst: The temp list to put pages on to. - * @scanned: The number of pages that were scanned. + * @nr_scanned: The number of pages that were scanned. * @order: The caller's attempted allocation order * @mode: One of the LRU isolation modes + * @active: True [1] if isolating active pages * @file: True [1] if isolating file [!anon] pages * * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, - struct list_head *src, struct list_head *dst, - unsigned long *scanned, int order, isolate_mode_t mode, - int file) + struct mem_cgroup_zone *mz, struct list_head *dst, + unsigned long *nr_scanned, int order, isolate_mode_t mode, + int active, int file) { + struct lruvec *lruvec; + struct list_head *src; unsigned long nr_taken = 0; unsigned long nr_lumpy_taken = 0; unsigned long nr_lumpy_dirty = 0; unsigned long nr_lumpy_failed = 0; unsigned long scan; + int lru = LRU_BASE; + + lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); + if (active) + lru += LRU_ACTIVE; + if (file) + lru += LRU_FILE; + src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; @@ -1113,15 +1181,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, switch (__isolate_lru_page(page, mode, file)) { case 0: + mem_cgroup_lru_del(page); list_move(&page->lru, dst); - mem_cgroup_del_lru(page); nr_taken += hpage_nr_pages(page); break; case -EBUSY: /* else it is being freed elsewhere */ list_move(&page->lru, src); - mem_cgroup_rotate_lru_list(page, page_lru(page)); continue; default: @@ -1171,13 +1238,17 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, break; if (__isolate_lru_page(cursor_page, mode, file) == 0) { + unsigned int isolated_pages; + + mem_cgroup_lru_del(cursor_page); list_move(&cursor_page->lru, dst); - mem_cgroup_del_lru(cursor_page); - nr_taken += hpage_nr_pages(cursor_page); - nr_lumpy_taken++; + isolated_pages = hpage_nr_pages(cursor_page); + nr_taken += isolated_pages; + nr_lumpy_taken += isolated_pages; if (PageDirty(cursor_page)) - nr_lumpy_dirty++; + nr_lumpy_dirty += isolated_pages; scan++; + pfn += isolated_pages - 1; } else { /* * Check if the page is freed already. @@ -1203,57 +1274,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, nr_lumpy_failed++; } - *scanned = scan; + *nr_scanned = scan; trace_mm_vmscan_lru_isolate(order, nr_to_scan, scan, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, - mode); + mode, file); return nr_taken; } -static unsigned long isolate_pages_global(unsigned long nr, - struct list_head *dst, - unsigned long *scanned, int order, - isolate_mode_t mode, - struct zone *z, int active, int file) -{ - int lru = LRU_BASE; - if (active) - lru += LRU_ACTIVE; - if (file) - lru += LRU_FILE; - return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, - mode, file); -} - -/* - * clear_active_flags() is a helper for shrink_active_list(), clearing - * any active bits from the pages in the list. - */ -static unsigned long clear_active_flags(struct list_head *page_list, - unsigned int *count) -{ - int nr_active = 0; - int lru; - struct page *page; - - list_for_each_entry(page, page_list, lru) { - int numpages = hpage_nr_pages(page); - lru = page_lru_base_type(page); - if (PageActive(page)) { - lru += LRU_ACTIVE; - ClearPageActive(page); - nr_active += numpages; - } - if (count) - count[lru] += numpages; - } - - return nr_active; -} - /** * isolate_lru_page - tries to isolate a page from its LRU list * @page: page to isolate from its LRU list @@ -1313,7 +1343,7 @@ static int too_many_isolated(struct zone *zone, int file, if (current_is_kswapd()) return 0; - if (!scanning_global_lru(sc)) + if (!global_reclaim(sc)) return 0; if (file) { @@ -1327,27 +1357,21 @@ static int too_many_isolated(struct zone *zone, int file, return isolated > inactive; } -/* - * TODO: Try merging with migrations version of putback_lru_pages - */ static noinline_for_stack void -putback_lru_pages(struct zone *zone, struct scan_control *sc, - unsigned long nr_anon, unsigned long nr_file, - struct list_head *page_list) +putback_inactive_pages(struct mem_cgroup_zone *mz, + struct list_head *page_list) { - struct page *page; - struct pagevec pvec; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); - - pagevec_init(&pvec, 1); + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone *zone = mz->zone; + LIST_HEAD(pages_to_free); /* * Put back any unfreeable pages. */ - spin_lock(&zone->lru_lock); while (!list_empty(page_list)) { + struct page *page = lru_to_page(page_list); int lru; - page = lru_to_page(page_list); + VM_BUG_ON(PageLRU(page)); list_del(&page->lru); if (unlikely(!page_evictable(page, NULL))) { @@ -1364,30 +1388,53 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, int numpages = hpage_nr_pages(page); reclaim_stat->recent_rotated[file] += numpages; } - if (!pagevec_add(&pvec, page)) { - spin_unlock_irq(&zone->lru_lock); - __pagevec_release(&pvec); - spin_lock_irq(&zone->lru_lock); + if (put_page_testzero(page)) { + __ClearPageLRU(page); + __ClearPageActive(page); + del_page_from_lru_list(zone, page, lru); + + if (unlikely(PageCompound(page))) { + spin_unlock_irq(&zone->lru_lock); + (*get_compound_page_dtor(page))(page); + spin_lock_irq(&zone->lru_lock); + } else + list_add(&page->lru, &pages_to_free); } } - __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); - spin_unlock_irq(&zone->lru_lock); - pagevec_release(&pvec); + /* + * To save our caller's stack, now use input list for pages to free. + */ + list_splice(&pages_to_free, page_list); } -static noinline_for_stack void update_isolated_counts(struct zone *zone, - struct scan_control *sc, - unsigned long *nr_anon, - unsigned long *nr_file, - struct list_head *isolated_list) +static noinline_for_stack void +update_isolated_counts(struct mem_cgroup_zone *mz, + struct list_head *page_list, + unsigned long *nr_anon, + unsigned long *nr_file) { - unsigned long nr_active; + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone *zone = mz->zone; unsigned int count[NR_LRU_LISTS] = { 0, }; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); + unsigned long nr_active = 0; + struct page *page; + int lru; + + /* + * Count pages and clear active flags + */ + list_for_each_entry(page, page_list, lru) { + int numpages = hpage_nr_pages(page); + lru = page_lru_base_type(page); + if (PageActive(page)) { + lru += LRU_ACTIVE; + ClearPageActive(page); + nr_active += numpages; + } + count[lru] += numpages; + } - nr_active = clear_active_flags(isolated_list, count); __count_vm_events(PGDEACTIVATE, nr_active); __mod_zone_page_state(zone, NR_ACTIVE_FILE, @@ -1401,8 +1448,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); reclaim_stat->recent_scanned[0] += *nr_anon; reclaim_stat->recent_scanned[1] += *nr_file; @@ -1454,8 +1499,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, * of reclaimed pages */ static noinline_for_stack unsigned long -shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, - struct scan_control *sc, int priority, int file) +shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, + struct scan_control *sc, int priority, int file) { LIST_HEAD(page_list); unsigned long nr_scanned; @@ -1466,6 +1511,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; + struct zone *zone = mz->zone; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1488,9 +1534,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, spin_lock_irq(&zone->lru_lock); - if (scanning_global_lru(sc)) { - nr_taken = isolate_pages_global(nr_to_scan, &page_list, - &nr_scanned, sc->order, reclaim_mode, zone, 0, file); + nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, + &nr_scanned, sc->order, + reclaim_mode, 0, file); + if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, @@ -1498,14 +1545,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, else __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); - } else { - nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, - &nr_scanned, sc->order, reclaim_mode, zone, - sc->mem_cgroup, 0, file); - /* - * mem_cgroup_isolate_pages() keeps track of - * scanned pages on its own. - */ } if (nr_taken == 0) { @@ -1513,26 +1552,37 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, return 0; } - update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list); + update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); + + __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); spin_unlock_irq(&zone->lru_lock); - nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, + nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, &nr_dirty, &nr_writeback); /* Check if we should syncronously wait for writeback */ if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, zone, sc, + nr_reclaimed += shrink_page_list(&page_list, mz, sc, priority, &nr_dirty, &nr_writeback); } - local_irq_disable(); + spin_lock_irq(&zone->lru_lock); + if (current_is_kswapd()) __count_vm_events(KSWAPD_STEAL, nr_reclaimed); __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); - putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); + putback_inactive_pages(mz, &page_list); + + __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); + + spin_unlock_irq(&zone->lru_lock); + + free_hot_cold_page_list(&page_list, 1); /* * If reclaim is isolating dirty pages under writeback, it implies @@ -1588,30 +1638,47 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, static void move_active_pages_to_lru(struct zone *zone, struct list_head *list, + struct list_head *pages_to_free, enum lru_list lru) { unsigned long pgmoved = 0; - struct pagevec pvec; struct page *page; - pagevec_init(&pvec, 1); + if (buffer_heads_over_limit) { + spin_unlock_irq(&zone->lru_lock); + list_for_each_entry(page, list, lru) { + if (page_has_private(page) && trylock_page(page)) { + if (page_has_private(page)) + try_to_release_page(page, 0); + unlock_page(page); + } + } + spin_lock_irq(&zone->lru_lock); + } while (!list_empty(list)) { + struct lruvec *lruvec; + page = lru_to_page(list); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - list_move(&page->lru, &zone->lru[lru].list); - mem_cgroup_add_lru_list(page, lru); + lruvec = mem_cgroup_lru_add_list(zone, page, lru); + list_move(&page->lru, &lruvec->lists[lru]); pgmoved += hpage_nr_pages(page); - if (!pagevec_add(&pvec, page) || list_empty(list)) { - spin_unlock_irq(&zone->lru_lock); - if (buffer_heads_over_limit) - pagevec_strip(&pvec); - __pagevec_release(&pvec); - spin_lock_irq(&zone->lru_lock); + if (put_page_testzero(page)) { + __ClearPageLRU(page); + __ClearPageActive(page); + del_page_from_lru_list(zone, page, lru); + + if (unlikely(PageCompound(page))) { + spin_unlock_irq(&zone->lru_lock); + (*get_compound_page_dtor(page))(page); + spin_lock_irq(&zone->lru_lock); + } else + list_add(&page->lru, pages_to_free); } } __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); @@ -1619,19 +1686,22 @@ static void move_active_pages_to_lru(struct zone *zone, __count_vm_events(PGDEACTIVATE, pgmoved); } -static void shrink_active_list(unsigned long nr_pages, struct zone *zone, - struct scan_control *sc, int priority, int file) +static void shrink_active_list(unsigned long nr_to_scan, + struct mem_cgroup_zone *mz, + struct scan_control *sc, + int priority, int file) { unsigned long nr_taken; - unsigned long pgscanned; + unsigned long nr_scanned; unsigned long vm_flags; LIST_HEAD(l_hold); /* The pages which were snipped off */ LIST_HEAD(l_active); LIST_HEAD(l_inactive); struct page *page; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); unsigned long nr_rotated = 0; isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; + struct zone *zone = mz->zone; lru_add_drain(); @@ -1641,26 +1711,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, reclaim_mode |= ISOLATE_CLEAN; spin_lock_irq(&zone->lru_lock); - if (scanning_global_lru(sc)) { - nr_taken = isolate_pages_global(nr_pages, &l_hold, - &pgscanned, sc->order, - reclaim_mode, zone, - 1, file); - zone->pages_scanned += pgscanned; - } else { - nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, - &pgscanned, sc->order, - reclaim_mode, zone, - sc->mem_cgroup, 1, file); - /* - * mem_cgroup_isolate_pages() keeps track of - * scanned pages on its own. - */ - } + + nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, + &nr_scanned, sc->order, + reclaim_mode, 1, file); + if (global_reclaim(sc)) + zone->pages_scanned += nr_scanned; reclaim_stat->recent_scanned[file] += nr_taken; - __count_zone_vm_events(PGREFILL, zone, pgscanned); + __count_zone_vm_events(PGREFILL, zone, nr_scanned); if (file) __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); else @@ -1678,7 +1738,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, continue; } - if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { + if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and @@ -1711,12 +1771,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(zone, &l_active, + move_active_pages_to_lru(zone, &l_active, &l_hold, LRU_ACTIVE + file * LRU_FILE); - move_active_pages_to_lru(zone, &l_inactive, + move_active_pages_to_lru(zone, &l_inactive, &l_hold, LRU_BASE + file * LRU_FILE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); + + free_hot_cold_page_list(&l_hold, 1); } #ifdef CONFIG_SWAP @@ -1741,10 +1803,8 @@ static int inactive_anon_is_low_global(struct zone *zone) * Returns true if the zone does not have enough inactive anon pages, * meaning some active anon pages need to be deactivated. */ -static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) +static int inactive_anon_is_low(struct mem_cgroup_zone *mz) { - int low; - /* * If we don't have swap space, anonymous page deactivation * is pointless. @@ -1752,15 +1812,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) if (!total_swap_pages) return 0; - if (scanning_global_lru(sc)) - low = inactive_anon_is_low_global(zone); - else - low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); - return low; + if (!scanning_global_lru(mz)) + return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, + mz->zone); + + return inactive_anon_is_low_global(mz->zone); } #else -static inline int inactive_anon_is_low(struct zone *zone, - struct scan_control *sc) +static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) { return 0; } @@ -1778,8 +1837,7 @@ static int inactive_file_is_low_global(struct zone *zone) /** * inactive_file_is_low - check if file pages need to be deactivated - * @zone: zone to check - * @sc: scan control of this context + * @mz: memory cgroup and zone to check * * When the system is doing streaming IO, memory pressure here * ensures that active file pages get deactivated, until more @@ -1791,45 +1849,44 @@ static int inactive_file_is_low_global(struct zone *zone) * This uses a different ratio than the anonymous pages, because * the page cache uses a use-once replacement algorithm. */ -static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) +static int inactive_file_is_low(struct mem_cgroup_zone *mz) { - int low; + if (!scanning_global_lru(mz)) + return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, + mz->zone); - if (scanning_global_lru(sc)) - low = inactive_file_is_low_global(zone); - else - low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone); - return low; + return inactive_file_is_low_global(mz->zone); } -static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, - int file) +static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) { if (file) - return inactive_file_is_low(zone, sc); + return inactive_file_is_low(mz); else - return inactive_anon_is_low(zone, sc); + return inactive_anon_is_low(mz); } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct zone *zone, struct scan_control *sc, int priority) + struct mem_cgroup_zone *mz, + struct scan_control *sc, int priority) { int file = is_file_lru(lru); if (is_active_lru(lru)) { - if (inactive_list_is_low(zone, sc, file)) - shrink_active_list(nr_to_scan, zone, sc, priority, file); + if (inactive_list_is_low(mz, file)) + shrink_active_list(nr_to_scan, mz, sc, priority, file); return 0; } - return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); + return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); } -static int vmscan_swappiness(struct scan_control *sc) +static int vmscan_swappiness(struct mem_cgroup_zone *mz, + struct scan_control *sc) { - if (scanning_global_lru(sc)) + if (global_reclaim(sc)) return vm_swappiness; - return mem_cgroup_swappiness(sc->mem_cgroup); + return mem_cgroup_swappiness(mz->mem_cgroup); } /* @@ -1840,15 +1897,15 @@ static int vmscan_swappiness(struct scan_control *sc) * * nr[0] = anon pages to scan; nr[1] = file pages to scan */ -static void get_scan_count(struct zone *zone, struct scan_control *sc, - unsigned long *nr, int priority) +static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, + unsigned long *nr, int priority) { unsigned long anon, file, free; unsigned long anon_prio, file_prio; unsigned long ap, fp; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); u64 fraction[2], denominator; - enum lru_list l; + enum lru_list lru; int noswap = 0; bool force_scan = false; @@ -1862,9 +1919,9 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, * latencies, so it's better to scan a minimum amount there as * well. */ - if (scanning_global_lru(sc) && current_is_kswapd()) + if (current_is_kswapd() && mz->zone->all_unreclaimable) force_scan = true; - if (!scanning_global_lru(sc)) + if (!global_reclaim(sc)) force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ @@ -1876,16 +1933,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, goto out; } - anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + + zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); + file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + + zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); - if (scanning_global_lru(sc)) { - free = zone_page_state(zone, NR_FREE_PAGES); + if (global_reclaim(sc)) { + free = zone_page_state(mz->zone, NR_FREE_PAGES); /* If we have very few page cache pages, force-scan anon pages. */ - if (unlikely(file + free <= high_wmark_pages(zone))) { + if (unlikely(file + free <= high_wmark_pages(mz->zone))) { fraction[0] = 1; fraction[1] = 0; denominator = 1; @@ -1897,8 +1954,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, * With swappiness at 100, anonymous and file have the same priority. * This scanning priority is essentially the inverse of IO cost. */ - anon_prio = vmscan_swappiness(sc); - file_prio = 200 - vmscan_swappiness(sc); + anon_prio = vmscan_swappiness(mz, sc); + file_prio = 200 - vmscan_swappiness(mz, sc); /* * OK, so we have swap space and a fair amount of page cache @@ -1911,7 +1968,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, * * anon in [0], file in [1] */ - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&mz->zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; reclaim_stat->recent_rotated[0] /= 2; @@ -1932,24 +1989,24 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&mz->zone->lru_lock); fraction[0] = ap; fraction[1] = fp; denominator = ap + fp + 1; out: - for_each_evictable_lru(l) { - int file = is_file_lru(l); + for_each_evictable_lru(lru) { + int file = is_file_lru(lru); unsigned long scan; - scan = zone_nr_lru_pages(zone, sc, l); + scan = zone_nr_lru_pages(mz, lru); if (priority || noswap) { scan >>= priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; scan = div64_u64(scan * fraction[file], denominator); } - nr[l] = scan; + nr[lru] = scan; } } @@ -1960,7 +2017,7 @@ out: * back to the allocator and call try_to_compact_zone(), we ensure that * there are enough free pages for it to be likely successful */ -static inline bool should_continue_reclaim(struct zone *zone, +static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) @@ -2000,15 +2057,15 @@ static inline bool should_continue_reclaim(struct zone *zone, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); + inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(zone, sc->order)) { + switch (compaction_suitable(mz->zone, sc->order)) { case COMPACT_PARTIAL: case COMPACT_CONTINUE: return false; @@ -2020,12 +2077,12 @@ static inline bool should_continue_reclaim(struct zone *zone, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) +static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, + struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; - enum lru_list l; + enum lru_list lru; unsigned long nr_reclaimed, nr_scanned; unsigned long nr_to_reclaim = sc->nr_to_reclaim; struct blk_plug plug; @@ -2033,19 +2090,19 @@ static void shrink_zone(int priority, struct zone *zone, restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; - get_scan_count(zone, sc, nr, priority); + get_scan_count(mz, sc, nr, priority); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { - for_each_evictable_lru(l) { - if (nr[l]) { + for_each_evictable_lru(lru) { + if (nr[lru]) { nr_to_scan = min_t(unsigned long, - nr[l], SWAP_CLUSTER_MAX); - nr[l] -= nr_to_scan; + nr[lru], SWAP_CLUSTER_MAX); + nr[lru] -= nr_to_scan; - nr_reclaimed += shrink_list(l, nr_to_scan, - zone, sc, priority); + nr_reclaimed += shrink_list(lru, nr_to_scan, + mz, sc, priority); } } /* @@ -2066,17 +2123,89 @@ restart: * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_anon_is_low(zone, sc)) - shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); + if (inactive_anon_is_low(mz)) + shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); /* reclaim/compaction might need reclaim to continue */ - if (should_continue_reclaim(zone, nr_reclaimed, + if (should_continue_reclaim(mz, nr_reclaimed, sc->nr_scanned - nr_scanned, sc)) goto restart; throttle_vm_writeout(sc->gfp_mask); } +static void shrink_zone(int priority, struct zone *zone, + struct scan_control *sc) +{ + struct mem_cgroup *root = sc->target_mem_cgroup; + struct mem_cgroup_reclaim_cookie reclaim = { + .zone = zone, + .priority = priority, + }; + struct mem_cgroup *memcg; + + memcg = mem_cgroup_iter(root, NULL, &reclaim); + do { + struct mem_cgroup_zone mz = { + .mem_cgroup = memcg, + .zone = zone, + }; + + shrink_mem_cgroup_zone(priority, &mz, sc); + /* + * Limit reclaim has historically picked one memcg and + * scanned it with decreasing priority levels until + * nr_to_reclaim had been reclaimed. This priority + * cycle is thus over after a single memcg. + * + * Direct reclaim and kswapd, on the other hand, have + * to scan all memory cgroups to fulfill the overall + * scan target for the zone. + */ + if (!global_reclaim(sc)) { + mem_cgroup_iter_break(root, memcg); + break; + } + memcg = mem_cgroup_iter(root, memcg, &reclaim); + } while (memcg); +} + +/* Returns true if compaction should go ahead for a high-order request */ +static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) +{ + unsigned long balance_gap, watermark; + bool watermark_ok; + + /* Do not consider compaction for orders reclaim is meant to satisfy */ + if (sc->order <= PAGE_ALLOC_COSTLY_ORDER) + return false; + + /* + * Compaction takes time to run and there are potentially other + * callers using the pages just freed. Continue reclaiming until + * there is a buffer of free pages available to give compaction + * a reasonable chance of completing and allocating the page + */ + balance_gap = min(low_wmark_pages(zone), + (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / + KSWAPD_ZONE_BALANCE_GAP_RATIO); + watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order); + watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); + + /* + * If compaction is deferred, reclaim up to a point where + * compaction will have a chance of success when re-enabled + */ + if (compaction_deferred(zone)) + return watermark_ok; + + /* If compaction is not ready to start, keep reclaiming */ + if (!compaction_suitable(zone, sc->order)) + return false; + + return watermark_ok; +} + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -2094,8 +2223,9 @@ restart: * scan then give up on it. * * This function returns true if a zone is being reclaimed for a costly - * high-order allocation and compaction is either ready to begin or deferred. - * This indicates to the caller that it should retry the allocation or fail. + * high-order allocation and compaction is ready to begin. This indicates to + * the caller that it should consider retrying the allocation instead of + * further reclaim. */ static bool shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) @@ -2104,7 +2234,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, struct zone *zone; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; - bool should_abort_reclaim = false; + bool aborted_reclaim = false; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -2114,7 +2244,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, * Take care memory controller reclaiming has small influence * to global LRU. */ - if (scanning_global_lru(sc)) { + if (global_reclaim(sc)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; if (zone->all_unreclaimable && priority != DEF_PRIORITY) @@ -2129,10 +2259,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, * noticable problem, like transparent huge page * allocations. */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER && - (compaction_suitable(zone, sc->order) || - compaction_deferred(zone))) { - should_abort_reclaim = true; + if (compaction_ready(zone, sc)) { + aborted_reclaim = true; continue; } } @@ -2154,7 +2282,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, shrink_zone(priority, zone, sc); } - return should_abort_reclaim; + return aborted_reclaim; } static bool zone_reclaimable(struct zone *zone) @@ -2208,25 +2336,25 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; unsigned long writeback_threshold; + bool aborted_reclaim; get_mems_allowed(); delayacct_freepages_start(); - if (scanning_global_lru(sc)) + if (global_reclaim(sc)) count_vm_event(ALLOCSTALL); for (priority = DEF_PRIORITY; priority >= 0; priority--) { sc->nr_scanned = 0; if (!priority) - disable_swap_token(sc->mem_cgroup); - if (shrink_zones(priority, zonelist, sc)) - break; + disable_swap_token(sc->target_mem_cgroup); + aborted_reclaim = shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from * over limit cgroups */ - if (scanning_global_lru(sc)) { + if (global_reclaim(sc)) { unsigned long lru_pages = 0; for_each_zone_zonelist(zone, z, zonelist, gfp_zone(sc->gfp_mask)) { @@ -2287,8 +2415,12 @@ out: if (oom_killer_disabled) return 0; + /* Aborted reclaim to try compaction? don't OOM, then */ + if (aborted_reclaim) + return 1; + /* top priority shrink_zones still had more to do? don't OOM, then */ - if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) + if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc)) return 1; return 0; @@ -2305,7 +2437,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_unmap = 1, .may_swap = 1, .order = order, - .mem_cgroup = NULL, + .target_mem_cgroup = NULL, .nodemask = nodemask, }; struct shrink_control shrink = { @@ -2325,7 +2457,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR -unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, +unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, struct zone *zone, unsigned long *nr_scanned) @@ -2337,7 +2469,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, .may_unmap = 1, .may_swap = !noswap, .order = 0, - .mem_cgroup = mem, + .target_mem_cgroup = memcg, + }; + struct mem_cgroup_zone mz = { + .mem_cgroup = memcg, + .zone = zone, }; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | @@ -2354,7 +2490,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_zone(0, zone, &sc); + shrink_mem_cgroup_zone(0, &mz, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2362,7 +2498,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, return sc.nr_reclaimed; } -unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, +unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap) { @@ -2375,7 +2511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .may_swap = !noswap, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, - .mem_cgroup = mem_cont, + .target_mem_cgroup = memcg, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), @@ -2389,7 +2525,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, * take care of from where we get pages. So the node where we start the * scan does not need to be the current node. */ - nid = mem_cgroup_select_victim_node(mem_cont); + nid = mem_cgroup_select_victim_node(memcg); zonelist = NODE_DATA(nid)->node_zonelists; @@ -2405,6 +2541,29 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, } #endif +static void age_active_anon(struct zone *zone, struct scan_control *sc, + int priority) +{ + struct mem_cgroup *memcg; + + if (!total_swap_pages) + return; + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + struct mem_cgroup_zone mz = { + .mem_cgroup = memcg, + .zone = zone, + }; + + if (inactive_anon_is_low(&mz)) + shrink_active_list(SWAP_CLUSTER_MAX, &mz, + sc, priority, 0); + + memcg = mem_cgroup_iter(NULL, memcg, NULL); + } while (memcg); +} + /* * pgdat_balanced is used when checking if a node is balanced for high-order * allocations. Only zones that meet watermarks and are in a zone allowed @@ -2525,7 +2684,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, */ .nr_to_reclaim = ULONG_MAX, .order = order, - .mem_cgroup = NULL, + .target_mem_cgroup = NULL, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -2564,9 +2723,7 @@ loop_again: * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - if (inactive_anon_is_low(zone, &sc)) - shrink_active_list(SWAP_CLUSTER_MAX, zone, - &sc, priority, 0); + age_active_anon(zone, &sc, priority); if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 0, 0)) { @@ -3355,16 +3512,18 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) */ static void check_move_unevictable_page(struct page *page, struct zone *zone) { - VM_BUG_ON(PageActive(page)); + struct lruvec *lruvec; + VM_BUG_ON(PageActive(page)); retry: ClearPageUnevictable(page); if (page_evictable(page, NULL)) { enum lru_list l = page_lru_base_type(page); __dec_zone_state(zone, NR_UNEVICTABLE); - list_move(&page->lru, &zone->lru[l].list); - mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l); + lruvec = mem_cgroup_lru_move_lists(zone, page, + LRU_UNEVICTABLE, l); + list_move(&page->lru, &lruvec->lists[l]); __inc_zone_state(zone, NR_INACTIVE_ANON + l); __count_vm_event(UNEVICTABLE_PGRESCUED); } else { @@ -3372,8 +3531,9 @@ retry: * rotate unevictable list */ SetPageUnevictable(page); - list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); - mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE); + lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE, + LRU_UNEVICTABLE); + list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]); if (page_evictable(page, NULL)) goto retry; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 8fd603b1665..f600557a765 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -295,7 +295,7 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item) } EXPORT_SYMBOL(__dec_zone_page_state); -#ifdef CONFIG_CMPXCHG_LOCAL +#ifdef CONFIG_HAVE_CMPXCHG_LOCAL /* * If we have cmpxchg_local support then we do not need to incur the overhead * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index cdcfcabb34a..ef92864ac62 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -156,17 +156,17 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto, void bt_sock_link(struct bt_sock_list *l, struct sock *sk) { - write_lock_bh(&l->lock); + write_lock(&l->lock); sk_add_node(sk, &l->head); - write_unlock_bh(&l->lock); + write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_link); void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { - write_lock_bh(&l->lock); + write_lock(&l->lock); sk_del_node_init(sk); - write_unlock_bh(&l->lock); + write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_unlink); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4221bd256bd..001307f8105 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -711,7 +711,14 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, if (rp->status) return; - memcpy(hdev->extfeatures, rp->features, 8); + switch (rp->page) { + case 0: + memcpy(hdev->features, rp->features, 8); + break; + case 1: + memcpy(hdev->host_features, rp->features, 8); + break; + } hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); } @@ -1047,9 +1054,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, case LE_SCANNING_DISABLED: clear_bit(HCI_LE_SCAN, &hdev->dev_flags); - cancel_delayed_work_sync(&hdev->adv_work); - queue_delayed_work(hdev->workqueue, &hdev->adv_work, - jiffies + ADV_CLEAR_TIMEOUT); + schedule_delayed_work(&hdev->adv_work, ADV_CLEAR_TIMEOUT); break; default: @@ -2266,20 +2271,19 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s struct hci_ev_num_comp_pkts *ev = (void *) skb->data; int i; - skb_pull(skb, sizeof(*ev)); - - BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); return; } - if (skb->len < ev->num_hndl * 4) { + if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) + + ev->num_hndl * sizeof(struct hci_comp_pkts_info)) { BT_DBG("%s bad parameters", hdev->name); return; } + BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + for (i = 0; i < ev->num_hndl; i++) { struct hci_comp_pkts_info *info = &ev->handles[i]; struct hci_conn *conn; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6d94616af31..0dcc9626677 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -767,7 +767,6 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, node, &hci_sk_list.head) { - local_bh_disable(); bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; @@ -778,7 +777,6 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, hci_dev_put(hdev); } bh_unlock_sock(sk); - local_bh_enable(); } read_unlock(&hci_sk_list.lock); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index aa78d8c4b93..faf0b11ac1d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -165,7 +165,7 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm) { int err; - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); if (psm && __l2cap_global_chan_by_addr(psm, src)) { err = -EADDRINUSE; @@ -190,17 +190,17 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm) } done: - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); return err; } int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) { - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); chan->scid = scid; - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); return 0; } @@ -289,9 +289,9 @@ struct l2cap_chan *l2cap_chan_create(struct sock *sk) chan->sk = sk; - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); list_add(&chan->global_l, &chan_list); - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout); @@ -306,9 +306,9 @@ struct l2cap_chan *l2cap_chan_create(struct sock *sk) void l2cap_chan_destroy(struct l2cap_chan *chan) { - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); list_del(&chan->global_l); - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); l2cap_chan_put(chan); } @@ -543,14 +543,14 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn) * 200 - 254 are used by utilities like l2ping, etc. */ - spin_lock_bh(&conn->lock); + spin_lock(&conn->lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; - spin_unlock_bh(&conn->lock); + spin_unlock(&conn->lock); return id; } @@ -1190,7 +1190,7 @@ inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdad } /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, src); + bacpy(&bt_sk(sk)->dst, dst); chan->psm = psm; chan->dcid = cid; @@ -4702,7 +4702,7 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) { struct l2cap_chan *c; - read_lock_bh(&chan_list_lock); + read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { struct sock *sk = c->sk; @@ -4715,7 +4715,7 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) c->sec_level, c->mode); } - read_unlock_bh(&chan_list_lock); + read_unlock(&chan_list_lock); return 0; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 9ca5616166f..c61d967012b 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -587,6 +587,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch if (smp_conn_security(conn, sec.level)) break; sk->sk_state = BT_CONFIG; + chan->state = BT_CONFIG; /* or for ACL link, under defer_setup time */ } else if (sk->sk_state == BT_CONNECT2 && @@ -731,6 +732,7 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { sk->sk_state = BT_CONFIG; + pi->chan->state = BT_CONFIG; __l2cap_connect_rsp_defer(pi->chan); release_sock(sk); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2540944d871..bc8e59dda78 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -291,7 +291,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (!(hdev->features[4] & LMP_NO_BREDR)) settings |= MGMT_SETTING_BREDR; - if (hdev->extfeatures[0] & LMP_HOST_LE) + if (hdev->host_features[0] & LMP_HOST_LE) settings |= MGMT_SETTING_LE; if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2756,7 +2756,7 @@ int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) if (!cmd) return -ENOENT; - err = cmd_status(cmd->sk, hdev->id, cmd->opcode, status); + err = cmd_status(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status)); mgmt_pending_remove(cmd); return err; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index aea2bdd1510..f066678faee 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -370,7 +370,7 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr goto done; } - write_lock_bh(&rfcomm_sk_list.lock); + write_lock(&rfcomm_sk_list.lock); if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) { err = -EADDRINUSE; @@ -381,7 +381,7 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr sk->sk_state = BT_BOUND; } - write_unlock_bh(&rfcomm_sk_list.lock); + write_unlock(&rfcomm_sk_list.lock); done: release_sock(sk); @@ -455,7 +455,7 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) err = -EINVAL; - write_lock_bh(&rfcomm_sk_list.lock); + write_lock(&rfcomm_sk_list.lock); for (channel = 1; channel < 31; channel++) if (!__rfcomm_get_sock_by_addr(channel, src)) { @@ -464,7 +464,7 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) break; } - write_unlock_bh(&rfcomm_sk_list.lock); + write_unlock(&rfcomm_sk_list.lock); if (err < 0) goto done; @@ -982,7 +982,7 @@ static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) struct sock *sk; struct hlist_node *node; - read_lock_bh(&rfcomm_sk_list.lock); + read_lock(&rfcomm_sk_list.lock); sk_for_each(sk, node, &rfcomm_sk_list.head) { seq_printf(f, "%s %s %d %d\n", @@ -991,7 +991,7 @@ static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) sk->sk_state, rfcomm_pi(sk)->channel); } - read_unlock_bh(&rfcomm_sk_list.lock); + read_unlock(&rfcomm_sk_list.lock); return 0; } diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index fa8f4de53b9..a2d4f5122a6 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -76,7 +76,7 @@ struct rfcomm_dev { }; static LIST_HEAD(rfcomm_dev_list); -static DEFINE_RWLOCK(rfcomm_dev_lock); +static DEFINE_SPINLOCK(rfcomm_dev_lock); static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb); static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err); @@ -146,7 +146,7 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id) { struct rfcomm_dev *dev; - read_lock(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); dev = __rfcomm_dev_get(id); @@ -157,7 +157,7 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id) rfcomm_dev_hold(dev); } - read_unlock(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); return dev; } @@ -205,7 +205,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) if (!dev) return -ENOMEM; - write_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); if (req->dev_id < 0) { dev->id = 0; @@ -290,7 +290,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) __module_get(THIS_MODULE); out: - write_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); if (err < 0) goto free; @@ -327,9 +327,9 @@ static void rfcomm_dev_del(struct rfcomm_dev *dev) if (atomic_read(&dev->opened) > 0) return; - write_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); list_del_init(&dev->list); - write_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); rfcomm_dev_put(dev); } @@ -473,7 +473,7 @@ static int rfcomm_get_dev_list(void __user *arg) di = dl->dev_info; - read_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); list_for_each_entry(dev, &rfcomm_dev_list, list) { if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) @@ -488,7 +488,7 @@ static int rfcomm_get_dev_list(void __user *arg) break; } - read_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); dl->dev_num = n; size = sizeof(*dl) + n * sizeof(*di); @@ -766,9 +766,9 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) rfcomm_dlc_unlock(dev->dlc); if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) { - write_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); list_del_init(&dev->list); - write_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); rfcomm_dev_put(dev); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 5dc2f2126fa..8bf26d1bc5c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -482,7 +482,7 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le goto done; } - write_lock_bh(&sco_sk_list.lock); + write_lock(&sco_sk_list.lock); if (bacmp(src, BDADDR_ANY) && __sco_get_sock_by_addr(src)) { err = -EADDRINUSE; @@ -492,7 +492,7 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le sk->sk_state = BT_BOUND; } - write_unlock_bh(&sco_sk_list.lock); + write_unlock(&sco_sk_list.lock); done: release_sock(sk); @@ -965,14 +965,14 @@ static int sco_debugfs_show(struct seq_file *f, void *p) struct sock *sk; struct hlist_node *node; - read_lock_bh(&sco_sk_list.lock); + read_lock(&sco_sk_list.lock); sk_for_each(sk, node, &sco_sk_list.head) { seq_printf(f, "%s %s %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state); } - read_unlock_bh(&sco_sk_list.lock); + read_unlock(&sco_sk_list.lock); return 0; } diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 3a94eae7abe..b79747c4b64 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -510,10 +510,15 @@ int crush_do_rule(struct crush_map *map, switch (rule->steps[step].op) { case CRUSH_RULE_TAKE: w[0] = rule->steps[step].arg1; - if (force_pos >= 0) { - BUG_ON(force_context[force_pos] != w[0]); + + /* find position in force_context/hierarchy */ + while (force_pos >= 0 && + force_context[force_pos] != w[0]) force_pos--; - } + /* and move past it */ + if (force_pos >= 0) + force_pos--; + wsize = 1; break; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 85f3bc0a706..b780cb7947d 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -15,10 +15,9 @@ int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src) { memcpy(dst, src, sizeof(struct ceph_crypto_key)); - dst->key = kmalloc(src->len, GFP_NOFS); + dst->key = kmemdup(src->key, src->len, GFP_NOFS); if (!dst->key) return -ENOMEM; - memcpy(dst->key, src->key, src->len); return 0; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f4f3f58f523..5e254055c91 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -29,8 +29,8 @@ static void __register_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static int __send_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req); +static void __send_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); static int op_needs_trail(int op) { @@ -1022,8 +1022,8 @@ out: /* * caller should hold map_sem (for read) and request_mutex */ -static int __send_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) +static void __send_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) { struct ceph_osd_request_head *reqhead; @@ -1041,7 +1041,6 @@ static int __send_request(struct ceph_osd_client *osdc, ceph_msg_get(req->r_request); /* send consumes a ref */ ceph_con_send(&req->r_osd->o_con, req->r_request); req->r_sent = req->r_osd->o_incarnation; - return 0; } /* @@ -1726,17 +1725,9 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, dout("send_request %p no up osds in pg\n", req); ceph_monc_request_next_osdmap(&osdc->client->monc); } else { - rc = __send_request(osdc, req); - if (rc) { - if (nofail) { - dout("osdc_start_request failed send, " - " will retry %lld\n", req->r_tid); - rc = 0; - } else { - __unregister_request(osdc, req); - } - } + __send_request(osdc, req); } + rc = 0; } out_unlock: diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index abf4393a77b..f3dbd4f596a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1177,9 +1177,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue, nonempty = 1; } - if (nonempty) - RCU_INIT_POINTER(dev->xps_maps, new_dev_maps); - else { + if (nonempty) { + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + } else { kfree(new_dev_maps); RCU_INIT_POINTER(dev->xps_maps, NULL); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 0d38808a230..556b0829866 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -765,7 +765,7 @@ int __netpoll_setup(struct netpoll *np) } /* last thing to do is link it to the net device structure */ - RCU_INIT_POINTER(ndev->npinfo, npinfo); + rcu_assign_pointer(ndev->npinfo, npinfo); return 0; diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 8f162575337..028fc43aacb 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -49,13 +49,13 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); } static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 2ab16e12520..74d321a60e7 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -388,7 +388,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) } ifa->ifa_next = dn_db->ifa_list; - RCU_INIT_POINTER(dn_db->ifa_list, ifa); + rcu_assign_pointer(dn_db->ifa_list, ifa); dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); @@ -1093,7 +1093,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - RCU_INIT_POINTER(dev->dn_ptr, dn_db); + rcu_assign_pointer(dev->dn_ptr, dn_db); dn_db->dev = dev; init_timer(&dn_db->timer); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 65f01dc4756..e41c40f48cf 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev) ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ - RCU_INIT_POINTER(dev->ip_ptr, in_dev); + rcu_assign_pointer(dev->ip_ptr, in_dev); out: return in_dev; out_kfree: diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d04b13ae18f..2b555a5521e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -205,7 +205,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -/* Same as RCU_INIT_POINTER +/* Same as rcu_assign_pointer * but that macro() assumes that value is a pointer. */ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) @@ -529,7 +529,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * if (n) node_set_parent(n, tn); - RCU_INIT_POINTER(tn->child[i], n); + rcu_assign_pointer(tn->child[i], n); } #define MAX_WORK 10 @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) tp = node_parent((struct rt_trie_node *) tn); if (!tp) - RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1027,7 +1027,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1164,7 +1164,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)tn); } else { - RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; } } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5104bc0bbdb..450e5d21ed2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1249,7 +1249,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - RCU_INIT_POINTER(in_dev->mc_list, im); + rcu_assign_pointer(in_dev->mc_list, im); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1821,7 +1821,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - RCU_INIT_POINTER(inet->mc_list, iml); + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -2008,7 +2008,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } - RCU_INIT_POINTER(pmc->sflist, newpsl); + rcu_assign_pointer(pmc->sflist, newpsl); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2111,7 +2111,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - RCU_INIT_POINTER(pmc->sflist, newpsl); + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2240a8e8c44..fcf281819cd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -71,7 +71,7 @@ static inline void inet_diag_unlock_handler( } int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -193,7 +193,7 @@ nlmsg_failure: EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -202,7 +202,7 @@ static int inet_csk_diag_fill(struct sock *sk, } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -253,7 +253,7 @@ nlmsg_failure: } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) @@ -264,7 +264,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req *req) + const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { int err; struct sock *sk; @@ -333,7 +333,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; int err; @@ -540,7 +540,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -554,7 +554,7 @@ static int inet_csk_diag_dump(struct sock *sk, static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (bc != NULL) { @@ -639,7 +639,7 @@ nlmsg_failure: static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, const struct nlattr *bc) { struct inet_diag_entry entry; @@ -721,7 +721,7 @@ out: } void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc) + struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) { int i, num; int s_i, s_num; @@ -872,7 +872,7 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { const struct inet_diag_handler *handler; @@ -887,12 +887,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req); + int hdrlen = sizeof(struct inet_diag_req_v2); if (nlmsg_attrlen(cb->nlh, hdrlen)) bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc); + return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc); } static inline int inet_diag_type2proto(int type) @@ -909,10 +909,10 @@ static inline int inet_diag_type2proto(int type) static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) { - struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh); - struct inet_diag_req req; + struct inet_diag_req *rc = NLMSG_DATA(cb->nlh); + struct inet_diag_req_v2 req; struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req_compat); + int hdrlen = sizeof(struct inet_diag_req); req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -929,8 +929,8 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c static int inet_diag_get_exact_compat(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { - struct inet_diag_req_compat *rc = NLMSG_DATA(nlh); - struct inet_diag_req req; + struct inet_diag_req *rc = NLMSG_DATA(nlh); + struct inet_diag_req_v2 req; req.sdiag_family = rc->idiag_family; req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); @@ -943,7 +943,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { - int hdrlen = sizeof(struct inet_diag_req_compat); + int hdrlen = sizeof(struct inet_diag_req); if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) @@ -970,7 +970,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { - int hdrlen = sizeof(struct inet_diag_req); + int hdrlen = sizeof(struct inet_diag_req_v2); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -990,7 +990,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) inet_diag_dump, NULL, 0); } - return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h)); + return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); } static struct sock_diag_handler inet_diag_handler = { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 413ed1ba7a5..22a19931530 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - RCU_INIT_POINTER(*tp, t->next); + rcu_assign_pointer(*tp, t->next); break; } } @@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); - RCU_INIT_POINTER(*tp, t); + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); } static struct ip_tunnel * ipip_tunnel_locate(struct net *net, @@ -792,7 +792,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return -ENOMEM; dev_hold(dev); - RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel); + rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8e54490ee3f..7bc2db6db8d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1225,7 +1225,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { - RCU_INIT_POINTER(mrt->mroute_sk, sk); + rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; } rtnl_unlock(); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 8cd357a8be7..ed3f2ad42e0 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -35,13 +35,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 7fed04f875c..49978788a9d 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -108,7 +108,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) tcp = tcp_from_cgproto(cg_proto); percpu_counter_destroy(&tcp->tcp_sockets_allocated); - val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); + val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); if (val != RESOURCE_MAX) jump_label_dec(&memcg_socket_limit_enabled); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 69f8a7ca63d..e5e18cb8a58 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -19,7 +19,7 @@ #include <linux/sock_diag.h> static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req *req, + struct netlink_callback *cb, struct inet_diag_req_v2 *req, struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -30,7 +30,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req *req) + const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { int err = -EINVAL; struct sock *sk; @@ -88,7 +88,7 @@ out_nosk: } static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; @@ -136,13 +136,13 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udp_table, skb, cb, r, bc); } static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return udp_dump_one(&udp_table, in_skb, nlh, req); } @@ -154,13 +154,13 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udplite_table, skb, cb, r, bc); } static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return udp_dump_one(&udplite_table, in_skb, nlh, req); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0ba0866230c..a225d5ee3c2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -429,7 +429,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ndev->tstamp = jiffies; addrconf_sysctl_register(ndev); /* protected by rtnl_lock */ - RCU_INIT_POINTER(dev->ip6_ptr, ndev); + rcu_assign_pointer(dev->ip6_ptr, ndev); /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e1f7761815f..aa21da6a09c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -218,8 +218,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); - RCU_INIT_POINTER(t->next , rtnl_dereference(*tp)); - RCU_INIT_POINTER(*tp, t); + rcu_assign_pointer(t->next , rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); } /** @@ -237,7 +237,7 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - RCU_INIT_POINTER(*tp, t->next); + rcu_assign_pointer(*tp, t->next); break; } } @@ -1450,7 +1450,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - RCU_INIT_POINTER(ip6n->tnls_wc[0], t); + rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a4894f4f194..d02f7e4dd61 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -131,7 +131,7 @@ static mh_filter_t __rcu *mh_filter __read_mostly; int rawv6_mh_filter_register(mh_filter_t filter) { - RCU_INIT_POINTER(mh_filter, filter); + rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3b6dac956bb..133768e5291 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - RCU_INIT_POINTER(*tp, t->next); + rcu_assign_pointer(*tp, t->next); break; } } @@ -192,8 +192,8 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); - RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); - RCU_INIT_POINTER(*tp, t); + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) @@ -393,7 +393,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) p->addr = a->addr; p->flags = a->flags; t->prl_count++; - RCU_INIT_POINTER(t->prl, p); + rcu_assign_pointer(t->prl, p); out: return err; } @@ -1177,7 +1177,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; dev_hold(dev); - RCU_INIT_POINTER(sitn->tunnels_wc[0], tunnel); + rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 96debba2c40..1068f668ac4 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -332,7 +332,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_SUCCESS; /* activate it for RX */ - RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); + rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); if (timeout) mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 850bb96bd68..e60df48fa4d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -616,7 +616,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.dtim_period = new->dtim_period; - RCU_INIT_POINTER(sdata->u.ap.beacon, new); + rcu_assign_pointer(sdata->u.ap.beacon, new); synchronize_rcu(); @@ -1033,7 +1033,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -EBUSY; } - RCU_INIT_POINTER(vlansdata->u.vlan.sta, sta); + rcu_assign_pointer(vlansdata->u.vlan.sta, sta); } sta->sdata = vlansdata; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index f8a32bf9821..b3d76b756cd 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -207,7 +207,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, *pos++ = 0; /* U-APSD no in use */ } - RCU_INIT_POINTER(ifibss->presp, skb); + rcu_assign_pointer(ifibss->presp, skb); sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b197136aea2..3c428d4839c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -73,7 +73,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, if (!s) return -ENOENT; if (s == sta) { - RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], s->hnext); return 0; } @@ -83,7 +83,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, s = rcu_dereference_protected(s->hnext, lockdep_is_held(&local->sta_mtx)); if (rcu_access_pointer(s->hnext)) { - RCU_INIT_POINTER(s->hnext, sta->hnext); + rcu_assign_pointer(s->hnext, sta->hnext); return 0; } @@ -226,7 +226,7 @@ static void sta_info_hash_add(struct ieee80211_local *local, { lockdep_assert_held(&local->sta_mtx); sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; - RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], sta); + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } static void sta_unblock(struct work_struct *wk) diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 93aab0715e8..422b79851ec 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -106,7 +106,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail; - if (!(status->flag & RX_FLAG_IV_STRIPPED)) + if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key) goto update_iv; return RX_CONTINUE; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e875f8902db..76613f5a55c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -777,7 +777,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (exp->helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) - RCU_INIT_POINTER(help->helper, exp->helper); + rcu_assign_pointer(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index b62c4148b92..14af6329bdd 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -91,7 +91,7 @@ int nf_conntrack_register_notifier(struct net *net, ret = -EBUSY; goto out_unlock; } - RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, new); + rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -128,7 +128,7 @@ int nf_ct_expect_register_notifier(struct net *net, ret = -EBUSY; goto out_unlock; } - RCU_INIT_POINTER(net->ct.nf_expect_event_cb, new); + rcu_assign_pointer(net->ct.nf_expect_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4605c947dcc..641ff5f9671 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -169,7 +169,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type) before updating alloc_size */ type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) + type->len; - RCU_INIT_POINTER(nf_ct_ext_types[type->id], type); + rcu_assign_pointer(nf_ct_ext_types[type->id], type); update_alloc_size(type); out: mutex_unlock(&nf_ct_ext_type_mutex); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c9e0de08aa8..299fec91f74 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -157,7 +157,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, memset(&help->help, 0, sizeof(help->help)); } - RCU_INIT_POINTER(help->helper, helper); + rcu_assign_pointer(help->helper, helper); out: return ret; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e07dc3ae930..2a4834b8333 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1172,7 +1172,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return -EOPNOTSUPP; } - RCU_INIT_POINTER(help->helper, helper); + rcu_assign_pointer(help->helper, helper); return 0; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ce0c406f58a..957374a234d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -55,7 +55,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) llog = rcu_dereference_protected(nf_loggers[pf], lockdep_is_held(&nf_log_mutex)); if (llog == NULL) - RCU_INIT_POINTER(nf_loggers[pf], logger); + rcu_assign_pointer(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); @@ -92,7 +92,7 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) mutex_unlock(&nf_log_mutex); return -ENOENT; } - RCU_INIT_POINTER(nf_loggers[pf], logger); + rcu_assign_pointer(nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } @@ -250,7 +250,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - RCU_INIT_POINTER(nf_loggers[tindex], logger); + rcu_assign_pointer(nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 99ffd288508..b3a7db678b8 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -40,7 +40,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) else if (old) ret = -EBUSY; else { - RCU_INIT_POINTER(queue_handler[pf], qh); + rcu_assign_pointer(queue_handler[pf], qh); ret = 0; } mutex_unlock(&queue_handler_mutex); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c879c1a2370..b4f8d849480 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) nfnl_unlock(); return -EBUSY; } - RCU_INIT_POINTER(subsys_table[n->subsys_id], n); + rcu_assign_pointer(subsys_table[n->subsys_id], n); nfnl_unlock(); return 0; @@ -210,7 +210,7 @@ static int __net_init nfnetlink_net_init(struct net *net) if (!nfnl) return -ENOMEM; net->nfnl_stash = nfnl; - RCU_INIT_POINTER(net->nfnl, nfnl); + rcu_assign_pointer(net->nfnl, nfnl); return 0; } diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 38204112b9f..d8d42433755 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -282,7 +282,7 @@ int __init netlbl_domhsh_init(u32 size) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); spin_lock(&netlbl_domhsh_lock); - RCU_INIT_POINTER(netlbl_domhsh, hsh_tbl); + rcu_assign_pointer(netlbl_domhsh, hsh_tbl); spin_unlock(&netlbl_domhsh_lock); return 0; @@ -330,7 +330,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); } else { INIT_LIST_HEAD(&entry->list); - RCU_INIT_POINTER(netlbl_domhsh_def, entry); + rcu_assign_pointer(netlbl_domhsh_def, entry); } if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 4b5fa0fe78f..e7ff694f104 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -354,7 +354,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex) INIT_LIST_HEAD(&iface->list); if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL) goto add_iface_failure; - RCU_INIT_POINTER(netlbl_unlhsh_def, iface); + rcu_assign_pointer(netlbl_unlhsh_def, iface); } spin_unlock(&netlbl_unlhsh_lock); @@ -1447,11 +1447,9 @@ int __init netlbl_unlabel_init(u32 size) for (iter = 0; iter < hsh_tbl->size; iter++) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); - rcu_read_lock(); spin_lock(&netlbl_unlhsh_lock); - RCU_INIT_POINTER(netlbl_unlhsh, hsh_tbl); + rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); - rcu_read_unlock(); register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index bf10ea8fbbf..d65f699fbf3 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -480,7 +480,7 @@ int __init_or_module phonet_proto_register(unsigned int protocol, if (proto_tab[protocol]) err = -EBUSY; else - RCU_INIT_POINTER(proto_tab[protocol], pp); + rcu_assign_pointer(proto_tab[protocol], pp); mutex_unlock(&proto_tab_lock); return err; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index c5827614376..9b9a85ecc4c 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -390,7 +390,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr) daddr = daddr >> 2; mutex_lock(&routes->lock); if (routes->table[daddr] == NULL) { - RCU_INIT_POINTER(routes->table[daddr], dev); + rcu_assign_pointer(routes->table[daddr], dev); dev_hold(dev); err = 0; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 3f8d0b1603b..4c7eff30dfa 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -680,7 +680,7 @@ int pn_sock_bind_res(struct sock *sk, u8 res) mutex_lock(&resource_mutex); if (pnres.sk[res] == NULL) { sock_hold(sk); - RCU_INIT_POINTER(pnres.sk[res], sk); + rcu_assign_pointer(pnres.sk[res], sk); ret = 0; } mutex_unlock(&resource_mutex); diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 4e1de171866..a817705ce2d 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -477,17 +477,6 @@ void rds_iw_sync_mr(void *trans_private, int direction) } } -static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all) -{ - unsigned int item_count; - - item_count = atomic_read(&pool->item_count); - if (free_all) - return item_count; - - return 0; -} - /* * Flush our pool of MRs. * At a minimum, all currently unused MRs are unmapped. @@ -500,7 +489,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -514,8 +503,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) list_splice_init(&pool->clean_list, &kill_list); spin_unlock_irqrestore(&pool->list_lock, flags); - free_goal = rds_iw_flush_goal(pool, free_all); - /* Batched invalidate of dirty MRs. * For FMR based MRs, the mappings on the unmap list are * actually members of an ibmr (ibmr->mapping). They either diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 0a7964009e8..67494aef9ac 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -24,6 +24,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/flow_keys.h> +#include <net/red.h> /* Stochastic Fairness Queuing algorithm. @@ -108,24 +109,30 @@ struct sfq_slot { struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ + + unsigned int backlog; + struct red_vars vars; }; struct sfq_sched_data { /* frequently used fields */ int limit; /* limit of total number of packets in this qdisc */ unsigned int divisor; /* number of slots in hash table */ - unsigned int maxflows; /* number of flows in flows array */ - int headdrop; - int maxdepth; /* limit of packets per flow */ + u8 headdrop; + u8 maxdepth; /* limit of packets per flow */ u32 perturbation; - struct tcf_proto *filter_list; - sfq_index cur_depth; /* depth of longest slot */ + u8 cur_depth; /* depth of longest slot */ + u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ - struct sfq_slot *tail; /* current slot in round */ + struct tcf_proto *filter_list; sfq_index *ht; /* Hash table ('divisor' slots) */ struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ + struct red_parms *red_parms; + struct tc_sfqred_stats stats; + struct sfq_slot *tail; /* current slot in round */ + struct sfq_head dep[SFQ_MAX_DEPTH + 1]; /* Linked lists of slots, indexed by depth * dep[0] : list of unused flows @@ -133,6 +140,7 @@ struct sfq_sched_data { * dep[X] : list of flows with X packets */ + unsigned int maxflows; /* number of flows in flows array */ int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; @@ -321,6 +329,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); + slot->backlog -= len; sfq_dec(q, x); kfree_skb(skb); sch->q.qlen--; @@ -341,6 +350,23 @@ drop: return 0; } +/* Is ECN parameter configured */ +static int sfq_prob_mark(const struct sfq_sched_data *q) +{ + return q->flags & TC_RED_ECN; +} + +/* Should packets over max threshold just be marked */ +static int sfq_hard_mark(const struct sfq_sched_data *q) +{ + return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; +} + +static int sfq_headdrop(const struct sfq_sched_data *q) +{ + return q->headdrop; +} + static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { @@ -349,6 +375,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); + struct sk_buff *head; + int delta; hash = sfq_classify(skb, sch, &ret); if (hash == 0) { @@ -368,24 +396,75 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; + slot->backlog = 0; /* should already be 0 anyway... */ + red_set_vars(&slot->vars); + goto enqueue; } + if (q->red_parms) { + slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, + &slot->vars, + slot->backlog); + switch (red_action(q->red_parms, + &slot->vars, + slot->vars.qavg)) { + case RED_DONT_MARK: + break; - if (slot->qlen >= q->maxdepth) { - struct sk_buff *head; + case RED_PROB_MARK: + sch->qstats.overlimits++; + if (sfq_prob_mark(q)) { + /* We know we have at least one packet in queue */ + if (sfq_headdrop(q) && + INET_ECN_set_ce(slot->skblist_next)) { + q->stats.prob_mark_head++; + break; + } + if (INET_ECN_set_ce(skb)) { + q->stats.prob_mark++; + break; + } + } + q->stats.prob_drop++; + goto congestion_drop; + + case RED_HARD_MARK: + sch->qstats.overlimits++; + if (sfq_hard_mark(q)) { + /* We know we have at least one packet in queue */ + if (sfq_headdrop(q) && + INET_ECN_set_ce(slot->skblist_next)) { + q->stats.forced_mark_head++; + break; + } + if (INET_ECN_set_ce(skb)) { + q->stats.forced_mark++; + break; + } + } + q->stats.forced_drop++; + goto congestion_drop; + } + } - if (!q->headdrop) + if (slot->qlen >= q->maxdepth) { +congestion_drop: + if (!sfq_headdrop(q)) return qdisc_drop(skb, sch); + /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); - sch->qstats.backlog -= qdisc_pkt_len(head); + delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); + sch->qstats.backlog -= delta; + slot->backlog -= delta; qdisc_drop(head, sch); - sch->qstats.backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); return NET_XMIT_CN; } +enqueue: sch->qstats.backlog += qdisc_pkt_len(skb); + slot->backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ @@ -396,6 +475,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot->next = q->tail->next; q->tail->next = x; } + /* We could use a bigger initial quantum for new flows */ slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) @@ -439,7 +519,7 @@ next_slot: qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); - + slot->backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (slot->qlen == 0) { q->ht[slot->hash] = SFQ_EMPTY_SLOT; @@ -490,6 +570,8 @@ static void sfq_rehash(struct Qdisc *sch) sfq_dec(q, i); __skb_queue_tail(&list, skb); } + slot->backlog = 0; + red_set_vars(&slot->vars); q->ht[slot->hash] = SFQ_EMPTY_SLOT; } q->tail = NULL; @@ -514,6 +596,11 @@ drop: sch->qstats.backlog -= qdisc_pkt_len(skb); if (slot->qlen >= q->maxdepth) goto drop; slot_queue_add(slot, skb); + if (q->red_parms) + slot->vars.qavg = red_calc_qavg(q->red_parms, + &slot->vars, + slot->backlog); + slot->backlog += qdisc_pkt_len(skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ @@ -552,6 +639,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen; + struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -560,7 +648,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; - + if (ctl_v1 && ctl_v1->qth_min) { + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + } sch_tree_lock(sch); if (ctl->quantum) { q->quantum = ctl->quantum; @@ -576,6 +668,16 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl_v1) { if (ctl_v1->depth) q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + if (p) { + swap(q->red_parms, p); + red_set_parms(q->red_parms, + ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, + ctl_v1->Plog, ctl_v1->Scell_log, + NULL, + ctl_v1->max_P); + } + q->flags = ctl_v1->flags; q->headdrop = ctl_v1->headdrop; } if (ctl->limit) { @@ -594,6 +696,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->perturbation = net_random(); } sch_tree_unlock(sch); + kfree(p); return 0; } @@ -625,6 +728,7 @@ static void sfq_destroy(struct Qdisc *sch) del_timer_sync(&q->perturb_timer); sfq_free(q->ht); sfq_free(q->slots); + kfree(q->red_parms); } static int sfq_init(struct Qdisc *sch, struct nlattr *opt) @@ -683,6 +787,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) struct sfq_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt_v1 opt; + struct red_parms *p = q->red_parms; memset(&opt, 0, sizeof(opt)); opt.v0.quantum = q->quantum; @@ -693,6 +798,17 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.depth = q->maxdepth; opt.headdrop = q->headdrop; + if (p) { + opt.qth_min = p->qth_min >> p->Wlog; + opt.qth_max = p->qth_max >> p->Wlog; + opt.Wlog = p->Wlog; + opt.Plog = p->Plog; + opt.Scell_log = p->Scell_log; + opt.max_P = p->max_P; + } + memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); + opt.flags = q->flags; + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; @@ -747,15 +863,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sfq_index idx = q->ht[cl - 1]; struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; - struct sk_buff *skb; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; qs.qlen = slot->qlen; - slot_queue_walk(slot, skb) - qs.backlog += qdisc_pkt_len(skb); + qs.backlog = slot->backlog; } if (gnet_stats_copy_queue(d, &qs) < 0) return -1; diff --git a/net/socket.c b/net/socket.c index e56162cd65b..28a96af484b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2492,7 +2492,7 @@ int sock_register(const struct net_proto_family *ops) lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - RCU_INIT_POINTER(net_families[ops->family], ops); + rcu_assign_pointer(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 28d72d29873..affa631ac1a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -122,7 +122,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) return; gss_get_ctx(ctx); - RCU_INIT_POINTER(gss_cred->gc_ctx, ctx); + rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); smp_mb__before_clear_bit(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 03b56bc3b65..465df9ae104 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net) sunrpc_destroy_cache_detail(cd); return ret; } +EXPORT_SYMBOL_GPL(cache_register_net); int cache_register(struct cache_detail *cd) { @@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) remove_cache_proc_entries(cd, net); sunrpc_destroy_cache_detail(cd); } +EXPORT_SYMBOL_GPL(cache_unregister_net); void cache_unregister(struct cache_detail *cd) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9d01d46b05f..e4aabc02368 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) fail_free: kfree(m->to_pool); + m->to_pool = NULL; fail: return -ENOMEM; } @@ -285,9 +286,10 @@ svc_pool_map_put(void) mutex_lock(&svc_pool_map_mutex); if (!--m->count) { - m->mode = SVC_POOL_DEFAULT; kfree(m->to_pool); + m->to_pool = NULL; kfree(m->pool_to); + m->pool_to = NULL; m->npools = 0; } @@ -527,17 +529,20 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - - svc_close_all(&serv->sv_tempsocks); + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ + svc_close_all(serv); if (serv->sv_shutdown) serv->sv_shutdown(serv); - svc_close_all(&serv->sv_permsocks); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); - cache_clean_deferred(serv); if (svc_serv_is_pooled(serv)) @@ -683,8 +688,8 @@ found_pool: * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between - * all pools. Must be called with a svc_get() reference and - * the BKL or another lock to protect access to svc_serv fields. + * all pools. Caller must ensure that mutual exclusion between this and + * server startup or shutdown. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 38649cfa4e8..74cb0d8e9ca 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); +static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -147,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put); * Called by transport drivers to initialize the transport independent * portion of the transport instance. */ -void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, - struct svc_serv *serv) +void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, + struct svc_xprt *xprt, struct svc_serv *serv) { memset(xprt, 0, sizeof(*xprt)); xprt->xpt_class = xcl; @@ -163,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); - xprt->xpt_net = get_net(&init_net); + xprt->xpt_net = get_net(net); } EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt) /* * Remove a dead transport */ -void svc_delete_xprt(struct svc_xprt *xprt) +static void svc_delete_xprt(struct svc_xprt *xprt) { struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; @@ -893,14 +894,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - /* - * The only time we're called while xpt_ready is still on a list - * is while the list itself is about to be destroyed (in - * svc_destroy). BUT svc_xprt_enqueue could still be attempting - * to add new entries to the sp_sockets list, so we can't leave - * a freed xprt on it. - */ - list_del_init(&xprt->xpt_ready); + BUG_ON(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -928,22 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list) +{ + struct svc_xprt *xprt; + + list_for_each_entry(xprt, xprt_list, xpt_list) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_BUSY, &xprt->xpt_flags); + } +} + +void svc_close_all(struct svc_serv *serv) { + struct svc_pool *pool; struct svc_xprt *xprt; struct svc_xprt *tmp; + int i; + + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + while (!list_empty(&pool->sp_sockets)) { + xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + } + spin_unlock_bh(&pool->sp_lock); + } /* - * The server is shutting down, and no more threads are running. - * svc_xprt_enqueue() might still be running, but at worst it - * will re-add the xprt to sp_sockets, which will soon get - * freed. So we don't bother with any more locking, and don't - * leave the close to the (nonexistent) server threads: + * At this point the sp_sockets lists will stay empty, since + * svc_enqueue will not add new entries without taking the + * sp_lock and checking XPT_BUSY. */ - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - set_bit(XPT_CLOSE, &xprt->xpt_flags); + list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) svc_delete_xprt(xprt); - } + list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) + svc_delete_xprt(xprt); + + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); } /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 4653286fcc9..464570906f8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { int err, level, optname, one = 1; - svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); + svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, + &svsk->sk_xprt, serv); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; @@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) { struct sock *sk = svsk->sk_sk; - svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); + svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, + &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); @@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, return ERR_PTR(-ENOMEM); xprt = &svsk->sk_xprt; - svc_xprt_init(&svc_tcp_bc_class, xprt, serv); + svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ba1296d88de..894cb42db91 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, if (!cma_xprt) return NULL; - svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); + svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b3d3cf8931c..afeea32e04a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2250,6 +2250,7 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { }; static int parse_station_flags(struct genl_info *info, + enum nl80211_iftype iftype, struct station_parameters *params) { struct nlattr *flags[NL80211_STA_FLAG_MAX + 1]; @@ -2283,8 +2284,33 @@ static int parse_station_flags(struct genl_info *info, nla, sta_flags_policy)) return -EINVAL; - params->sta_flags_mask = (1 << __NL80211_STA_FLAG_AFTER_LAST) - 1; - params->sta_flags_mask &= ~1; + /* + * Only allow certain flags for interface types so that + * other attributes are silently ignored. Remember that + * this is backward compatibility code with old userspace + * and shouldn't be hit in other cases anyway. + */ + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_TDLS_PEER); + break; + case NL80211_IFTYPE_MESH_POINT: + params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHORIZED); + default: + return -EINVAL; + } for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++) if (flags[flag]) @@ -2585,7 +2611,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->change_station) return -EOPNOTSUPP; - if (parse_station_flags(info, ¶ms)) + if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) @@ -2731,7 +2757,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->add_station) return -EOPNOTSUPP; - if (parse_station_flags(info, ¶ms)) + if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; switch (dev->ieee80211_ptr->iftype) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e0d747a2e80..637f11a1e4d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2927,7 +2927,7 @@ static int __net_init xfrm_user_net_init(struct net *net) if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ - RCU_INIT_POINTER(net->xfrm.nlsk, nlsk); + rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index e8c96957776..c0e14b3f230 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -823,6 +823,16 @@ static int do_spi_entry(const char *filename, struct spi_device_id *id, } ADD_TO_DEVTABLE("spi", struct spi_device_id, do_spi_entry); +/* Looks like: mcp:S */ +static int do_mcp_entry(const char *filename, struct mcp_device_id *id, + char *alias) +{ + sprintf(alias, MCP_MODULE_PREFIX "%s", id->name); + + return 1; +} +ADD_TO_DEVTABLE("mcp", struct mcp_device_id, do_mcp_entry); + static const struct dmifield { const char *prefix; int field; diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh new file mode 100755 index 00000000000..06a399ac8b2 --- /dev/null +++ b/tools/nfsd/inject_fault.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> +# +# Script for easier NFSD fault injection + +# Check that debugfs has been mounted +DEBUGFS=`cat /proc/mounts | grep debugfs` +if [ "$DEBUGFS" == "" ]; then + echo "debugfs does not appear to be mounted!" + echo "Please mount debugfs and try again" + exit 1 +fi + +# Check that the fault injection directory exists +DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd +if [ ! -d "$DEBUGDIR" ]; then + echo "$DEBUGDIR does not exist" + echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION" + exit 1 +fi + +function help() +{ + echo "Usage $0 injection_type [count]" + echo "" + echo "Injection types are:" + ls $DEBUGDIR + exit 1 +} + +if [ $# == 0 ]; then + help +elif [ ! -f $DEBUGDIR/$1 ]; then + help +elif [ $# != 2 ]; then + COUNT=0 +else + COUNT=$2 +fi + +BEFORE=`mktemp` +AFTER=`mktemp` +dmesg > $BEFORE +echo $COUNT > $DEBUGDIR/$1 +dmesg > $AFTER +# Capture lines that only exist in the $AFTER file +diff $BEFORE $AFTER | grep ">" +rm -f $BEFORE $AFTER diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile new file mode 100644 index 00000000000..4ec84018cc1 --- /dev/null +++ b/tools/testing/selftests/Makefile @@ -0,0 +1,11 @@ +TARGETS = breakpoints + +all: + for TARGET in $(TARGETS); do \ + make -C $$TARGET; \ + done; + +clean: + for TARGET in $(TARGETS); do \ + make -C $$TARGET clean; \ + done; diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile new file mode 100644 index 00000000000..f362722cdce --- /dev/null +++ b/tools/testing/selftests/breakpoints/Makefile @@ -0,0 +1,20 @@ +# Taken from perf makefile +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := x86 +endif +ifeq ($(ARCH),x86_64) + ARCH := x86 +endif + + +all: +ifeq ($(ARCH),x86) + gcc breakpoint_test.c -o run_test +else + echo "Not an x86 target, can't build breakpoints selftests" +endif + +clean: + rm -fr run_test diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c new file mode 100644 index 00000000000..a0743f3b2b5 --- /dev/null +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> + * + * Licensed under the terms of the GNU GPL License version 2 + * + * Selftests for breakpoints (and more generally the do_debug() path) in x86. + */ + + +#include <sys/ptrace.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/user.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + + +/* Breakpoint access modes */ +enum { + BP_X = 1, + BP_RW = 2, + BP_W = 4, +}; + +static pid_t child_pid; + +/* + * Ensures the child and parent are always "talking" about + * the same test sequence. (ie: that we haven't forgotten + * to call check_trapped() somewhere). + */ +static int nr_tests; + +static void set_breakpoint_addr(void *addr, int n) +{ + int ret; + + ret = ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user, u_debugreg[n]), addr); + if (ret) { + perror("Can't set breakpoint addr\n"); + exit(-1); + } +} + +static void toggle_breakpoint(int n, int type, int len, + int local, int global, int set) +{ + int ret; + + int xtype, xlen; + unsigned long vdr7, dr7; + + switch (type) { + case BP_X: + xtype = 0; + break; + case BP_W: + xtype = 1; + break; + case BP_RW: + xtype = 3; + break; + } + + switch (len) { + case 1: + xlen = 0; + break; + case 2: + xlen = 4; + break; + case 4: + xlen = 0xc; + break; + case 8: + xlen = 8; + break; + } + + dr7 = ptrace(PTRACE_PEEKUSER, child_pid, + offsetof(struct user, u_debugreg[7]), 0); + + vdr7 = (xlen | xtype) << 16; + vdr7 <<= 4 * n; + + if (local) { + vdr7 |= 1 << (2 * n); + vdr7 |= 1 << 8; + } + if (global) { + vdr7 |= 2 << (2 * n); + vdr7 |= 1 << 9; + } + + if (set) + dr7 |= vdr7; + else + dr7 &= ~vdr7; + + ret = ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user, u_debugreg[7]), dr7); + if (ret) { + perror("Can't set dr7"); + exit(-1); + } +} + +/* Dummy variables to test read/write accesses */ +static unsigned long long dummy_var[4]; + +/* Dummy functions to test execution accesses */ +static void dummy_func(void) { } +static void dummy_func1(void) { } +static void dummy_func2(void) { } +static void dummy_func3(void) { } + +static void (*dummy_funcs[])(void) = { + dummy_func, + dummy_func1, + dummy_func2, + dummy_func3, +}; + +static int trapped; + +static void check_trapped(void) +{ + /* + * If we haven't trapped, wake up the parent + * so that it notices the failure. + */ + if (!trapped) + kill(getpid(), SIGUSR1); + trapped = 0; + + nr_tests++; +} + +static void write_var(int len) +{ + char *pcval; short *psval; int *pival; long long *plval; + int i; + + for (i = 0; i < 4; i++) { + switch (len) { + case 1: + pcval = (char *)&dummy_var[i]; + *pcval = 0xff; + break; + case 2: + psval = (short *)&dummy_var[i]; + *psval = 0xffff; + break; + case 4: + pival = (int *)&dummy_var[i]; + *pival = 0xffffffff; + break; + case 8: + plval = (long long *)&dummy_var[i]; + *plval = 0xffffffffffffffffLL; + break; + } + check_trapped(); + } +} + +static void read_var(int len) +{ + char cval; short sval; int ival; long long lval; + int i; + + for (i = 0; i < 4; i++) { + switch (len) { + case 1: + cval = *(char *)&dummy_var[i]; + break; + case 2: + sval = *(short *)&dummy_var[i]; + break; + case 4: + ival = *(int *)&dummy_var[i]; + break; + case 8: + lval = *(long long *)&dummy_var[i]; + break; + } + check_trapped(); + } +} + +/* + * Do the r/w/x accesses to trigger the breakpoints. And run + * the usual traps. + */ +static void trigger_tests(void) +{ + int len, local, global, i; + char val; + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("Can't be traced?\n"); + return; + } + + /* Wake up father so that it sets up the first test */ + kill(getpid(), SIGUSR1); + + /* Test instruction breakpoints */ + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + + for (i = 0; i < 4; i++) { + dummy_funcs[i](); + check_trapped(); + } + } + } + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + write_var(len); + } + } + } + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + read_var(len); + } + } + } + + /* Icebp trap */ + asm(".byte 0xf1\n"); + check_trapped(); + + /* Int 3 trap */ + asm("int $3\n"); + check_trapped(); + + kill(getpid(), SIGUSR1); +} + +static void check_success(const char *msg) +{ + const char *msg2; + int child_nr_tests; + int status; + + /* Wait for the child to SIGTRAP */ + wait(&status); + + msg2 = "Failed"; + + if (WSTOPSIG(status) == SIGTRAP) { + child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid, + &nr_tests, 0); + if (child_nr_tests == nr_tests) + msg2 = "Ok"; + if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) { + perror("Can't poke\n"); + exit(-1); + } + } + + nr_tests++; + + printf("%s [%s]\n", msg, msg2); +} + +static void launch_instruction_breakpoints(char *buf, int local, int global) +{ + int i; + + for (i = 0; i < 4; i++) { + set_breakpoint_addr(dummy_funcs[i], i); + toggle_breakpoint(i, BP_X, 1, local, global, 1); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test breakpoint %d with local: %d global: %d", + i, local, global); + check_success(buf); + toggle_breakpoint(i, BP_X, 1, local, global, 0); + } +} + +static void launch_watchpoints(char *buf, int mode, int len, + int local, int global) +{ + const char *mode_str; + int i; + + if (mode == BP_W) + mode_str = "write"; + else + mode_str = "read"; + + for (i = 0; i < 4; i++) { + set_breakpoint_addr(&dummy_var[i], i); + toggle_breakpoint(i, mode, len, local, global, 1); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint %d with len: %d local: " + "%d global: %d", mode_str, i, len, local, global); + check_success(buf); + toggle_breakpoint(i, mode, len, local, global, 0); + } +} + +/* Set the breakpoints and check the child successfully trigger them */ +static void launch_tests(void) +{ + char buf[1024]; + int len, local, global, i; + + /* Instruction breakpoints */ + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_instruction_breakpoints(buf, local, global); + } + } + + /* Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_watchpoints(buf, BP_W, len, + local, global); + } + } + } + + /* Read-Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) { + for (local = 0; local < 2; local++) { + for (global = 0; global < 2; global++) { + if (!local && !global) + continue; + launch_watchpoints(buf, BP_RW, len, + local, global); + } + } + } + + /* Icebp traps */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success("Test icebp"); + + /* Int 3 traps */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success("Test int 3 trap"); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); +} + +int main(int argc, char **argv) +{ + pid_t pid; + int ret; + + pid = fork(); + if (!pid) { + trigger_tests(); + return 0; + } + + child_pid = pid; + + wait(NULL); + + launch_tests(); + + wait(NULL); + + return 0; +} diff --git a/tools/testing/selftests/run_tests b/tools/testing/selftests/run_tests new file mode 100644 index 00000000000..320718a4e6b --- /dev/null +++ b/tools/testing/selftests/run_tests @@ -0,0 +1,8 @@ +#!/bin/bash + +TARGETS=breakpoints + +for TARGET in $TARGETS +do + $TARGET/run_test +done |