diff options
329 files changed, 5835 insertions, 4130 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 279da08f754..8df003963d9 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -53,6 +53,14 @@ Description: 512 bytes of data. +What: /sys/block/<disk>/integrity/device_is_integrity_capable +Date: July 2014 +Contact: Martin K. Petersen <martin.petersen@oracle.com> +Description: + Indicates whether a storage device is capable of storing + integrity metadata. Set if the device is T10 PI-capable. + + What: /sys/block/<disk>/integrity/write_generate Date: June 2008 Contact: Martin K. Petersen <martin.petersen@oracle.com> diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt index 2d735b0ae38..f56ec97f0d1 100644 --- a/Documentation/block/data-integrity.txt +++ b/Documentation/block/data-integrity.txt @@ -129,11 +129,11 @@ interface for this is being worked on. 4.1 BIO The data integrity patches add a new field to struct bio when -CONFIG_BLK_DEV_INTEGRITY is enabled. bio->bi_integrity is a pointer -to a struct bip which contains the bio integrity payload. Essentially -a bip is a trimmed down struct bio which holds a bio_vec containing -the integrity metadata and the required housekeeping information (bvec -pool, vector count, etc.) +CONFIG_BLK_DEV_INTEGRITY is enabled. bio_integrity(bio) returns a +pointer to a struct bip which contains the bio integrity payload. +Essentially a bip is a trimmed down struct bio which holds a bio_vec +containing the integrity metadata and the required housekeeping +information (bvec pool, vector count, etc.) A kernel subsystem can enable data integrity protection on a bio by calling bio_integrity_alloc(bio). This will allocate and attach the @@ -192,16 +192,6 @@ will require extra work due to the application tag. supported by the block device. - int bdev_integrity_enabled(block_device, int rw); - - bdev_integrity_enabled() will return 1 if the block device - supports integrity metadata transfer for the data direction - specified in 'rw'. - - bdev_integrity_enabled() honors the write_generate and - read_verify flags in sysfs and will respond accordingly. - - int bio_integrity_prep(bio); To generate IMD for WRITE and to set up buffers for READ, the @@ -216,36 +206,6 @@ will require extra work due to the application tag. bio_integrity_enabled() returned 1. - int bio_integrity_tag_size(bio); - - If the filesystem wants to use the application tag space it will - first have to find out how much storage space is available. - Because tag space is generally limited (usually 2 bytes per - sector regardless of sector size), the integrity framework - supports interleaving the information between the sectors in an - I/O. - - Filesystems can call bio_integrity_tag_size(bio) to find out how - many bytes of storage are available for that particular bio. - - Another option is bdev_get_tag_size(block_device) which will - return the number of available bytes per hardware sector. - - - int bio_integrity_set_tag(bio, void *tag_buf, len); - - After a successful return from bio_integrity_prep(), - bio_integrity_set_tag() can be used to attach an opaque tag - buffer to a bio. Obviously this only makes sense if the I/O is - a WRITE. - - - int bio_integrity_get_tag(bio, void *tag_buf, len); - - Similarly, at READ I/O completion time the filesystem can - retrieve the tag buffer using bio_integrity_get_tag(). - - 5.3 PASSING EXISTING INTEGRITY METADATA Filesystems that either generate their own integrity metadata or @@ -298,8 +258,6 @@ will require extra work due to the application tag. .name = "STANDARDSBODY-TYPE-VARIANT-CSUM", .generate_fn = my_generate_fn, .verify_fn = my_verify_fn, - .get_tag_fn = my_get_tag_fn, - .set_tag_fn = my_set_tag_fn, .tuple_size = sizeof(struct my_tuple_size), .tag_size = <tag bytes per hw sector>, }; @@ -321,7 +279,5 @@ will require extra work due to the application tag. are available per hardware sector. For DIF this is either 2 or 0 depending on the value of the Control Mode Page ATO bit. - See 6.2 for a description of get_tag_fn and set_tag_fn. - ---------------------------------------------------------------------- 2007-12-24 Martin K. Petersen <martin.petersen@oracle.com> diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt index c4728839d0c..6edc3b616e9 100644 --- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt +++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -36,6 +36,7 @@ Optional properties: - reg : should specify the address and size used for NFC command registers, NFC registers and NFC Sram. NFC Sram address and size can be absent if don't want to use it. + - clocks: phandle to the peripheral clock - Optional properties: - atmel,write-by-sram: boolean to enable NFC write by sram. @@ -98,6 +99,7 @@ nand0: nand@40000000 { compatible = "atmel,sama5d3-nfc"; #address-cells = <1>; #size-cells = <1>; + clocks = <&hsmc_clk> reg = < 0x70000000 0x10000000 /* NFC Command Registers */ 0xffffc000 0x00000070 /* NFC HSMC regs */ diff --git a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt index 61c5ec850f2..6b9f680cb57 100644 --- a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt +++ b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt @@ -4,8 +4,8 @@ Flash chips (Memory Technology Devices) are often used for solid state file systems on embedded devices. - compatible : should contain the specific model of mtd chip(s) - used, if known, followed by either "cfi-flash", "jedec-flash" - or "mtd-ram". + used, if known, followed by either "cfi-flash", "jedec-flash", + "mtd-ram" or "mtd-rom". - reg : Address range(s) of the mtd chip(s) It's possible to (optionally) define multiple "reg" tuples so that non-identical chips can be described in one node. diff --git a/Documentation/devicetree/bindings/net/sti-dwmac.txt b/Documentation/devicetree/bindings/net/sti-dwmac.txt index 3dd3d0bf112..6762a6b5da7 100644 --- a/Documentation/devicetree/bindings/net/sti-dwmac.txt +++ b/Documentation/devicetree/bindings/net/sti-dwmac.txt @@ -1,58 +1,65 @@ STMicroelectronics SoC DWMAC glue layer controller +This file documents differences between the core properties in +Documentation/devicetree/bindings/net/stmmac.txt +and what is needed on STi platforms to program the stmmac glue logic. + The device node has following properties. Required properties: - - compatible : Can be "st,stih415-dwmac", "st,stih416-dwmac" or - "st,stid127-dwmac". - - reg : Offset of the glue configuration register map in system + - compatible : Can be "st,stih415-dwmac", "st,stih416-dwmac", + "st,stih407-dwmac", "st,stid127-dwmac". + - reg : Offset of the glue configuration register map in system configuration regmap pointed by st,syscon property and size. - - - reg-names : Should be "sti-ethconf". - - - st,syscon : Should be phandle to system configuration node which + - st,syscon : Should be phandle to system configuration node which encompases this glue registers. + - st,gmac_en: this is to enable the gmac into a dedicated sysctl control + register available on STiH407 SoC. + - sti-ethconf: this is the gmac glue logic register to enable the GMAC, + select among the different modes and program the clk retiming. + - pinctrl-0: pin-control for all the MII mode supported. - - st,tx-retime-src: On STi Parts for Giga bit speeds, 125Mhz clocks can be - wired up in from different sources. One via TXCLK pin and other via CLK_125 - pin. This wiring is totally board dependent. However the retiming glue - logic should be configured accordingly. Possible values for this property - - "txclk" - if 125Mhz clock is wired up via txclk line. - "clk_125" - if 125Mhz clock is wired up via clk_125 line. - - This property is only valid for Giga bit setup( GMII, RGMII), and it is - un-used for non-giga bit (MII and RMII) setups. Also note that internal - clockgen can not generate stable 125Mhz clock. - - - st,ext-phyclk: This boolean property indicates who is generating the clock - for tx and rx. This property is only valid for RMII case where the clock can - be generated from the MAC or PHY. - - - clock-names: should be "sti-ethclk". - - clocks: Should point to ethernet clockgen which can generate phyclk. - +Optional properties: + - resets : phandle pointing to the system reset controller with correct + reset line index for ethernet reset. + - st,ext-phyclk: valid only for RMII where PHY can generate 50MHz clock or + MAC can generate it. + - st,tx-retime-src: This specifies which clk is wired up to the mac for + retimeing tx lines. This is totally board dependent and can take one of the + posssible values from "txclk", "clk_125" or "clkgen". + If not passed, the internal clock will be used by default. + - sti-ethclk: this is the phy clock. + - sti-clkconf: this is an extra sysconfig register, available in new SoCs, + to program the clk retiming. + - st,gmac_en: to enable the GMAC, this only is present in some SoCs; e.g. + STiH407. Example: -ethernet0: dwmac@fe810000 { - device_type = "network"; - compatible = "st,stih416-dwmac", "snps,dwmac", "snps,dwmac-3.710"; - reg = <0xfe810000 0x8000>, <0x8bc 0x4>; - reg-names = "stmmaceth", "sti-ethconf"; - interrupts = <0 133 0>, <0 134 0>, <0 135 0>; - interrupt-names = "macirq", "eth_wake_irq", "eth_lpi"; - phy-mode = "mii"; +ethernet0: dwmac@9630000 { + device_type = "network"; + status = "disabled"; + compatible = "st,stih407-dwmac", "snps,dwmac", "snps,dwmac-3.710"; + reg = <0x9630000 0x8000>, <0x80 0x4>; + reg-names = "stmmaceth", "sti-ethconf"; - st,syscon = <&syscfg_rear>; + st,syscon = <&syscfg_sbc_reg>; + st,gmac_en; + resets = <&softreset STIH407_ETH1_SOFTRESET>; + reset-names = "stmmaceth"; - snps,pbl = <32>; + interrupts = <GIC_SPI 98 IRQ_TYPE_NONE>, + <GIC_SPI 99 IRQ_TYPE_NONE>, + <GIC_SPI 100 IRQ_TYPE_NONE>; + interrupt-names = "macirq", "eth_wake_irq", "eth_lpi"; + + snps,pbl = <32>; snps,mixed-burst; - resets = <&softreset STIH416_ETH0_SOFTRESET>; - reset-names = "stmmaceth"; - pinctrl-0 = <&pinctrl_mii0>; - pinctrl-names = "default"; - clocks = <&CLK_S_GMAC0_PHY>; - clock-names = "stmmaceth"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_rgmii1>; + + clock-names = "stmmaceth", "sti-ethclk"; + clocks = <&CLK_S_C0_FLEXGEN CLK_EXT2F_A9>, + <&CLK_S_C0_FLEXGEN CLK_ETH_PHY>; }; diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index 61947facfc0..553f10d0307 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -14,7 +14,6 @@ Table of contents - The Device-Mapper driver - The Software RAID / MD driver - Limitations when using the MD driver -- ChangeLog Overview @@ -450,270 +449,3 @@ number of sectors BEFORE attempting to use it. You have been warned! Even better is to simply use the Device-Mapper for linear raid and then you do not have this problem with odd numbers of sectors. - - -ChangeLog -========= - -2.1.30: - - Fix writev() (it kept writing the first segment over and over again - instead of moving onto subsequent segments). - - Fix crash in ntfs_mft_record_alloc() when mapping the new extent mft - record failed. -2.1.29: - - Fix a deadlock when mounting read-write. -2.1.28: - - Fix a deadlock. -2.1.27: - - Implement page migration support so the kernel can move memory used - by NTFS files and directories around for management purposes. - - Add support for writing to sparse files created with Windows XP SP2. - - Many minor improvements and bug fixes. -2.1.26: - - Implement support for sector sizes above 512 bytes (up to the maximum - supported by NTFS which is 4096 bytes). - - Enhance support for NTFS volumes which were supported by Windows but - not by Linux due to invalid attribute list attribute flags. - - A few minor updates and bug fixes. -2.1.25: - - Write support is now extended with write(2) being able to both - overwrite existing file data and to extend files. Also, if a write - to a sparse region occurs, write(2) will fill in the hole. Note, - mmap(2) based writes still do not support writing into holes or - writing beyond the initialized size. - - Write support has a new feature and that is that truncate(2) and - open(2) with O_TRUNC are now implemented thus files can be both made - smaller and larger. - - Note: Both write(2) and truncate(2)/open(2) with O_TRUNC still have - limitations in that they - - only provide limited support for highly fragmented files. - - only work on regular, i.e. uncompressed and unencrypted files. - - never create sparse files although this will change once directory - operations are implemented. - - Lots of bug fixes and enhancements across the board. -2.1.24: - - Support journals ($LogFile) which have been modified by chkdsk. This - means users can boot into Windows after we marked the volume dirty. - The Windows boot will run chkdsk and then reboot. The user can then - immediately boot into Linux rather than having to do a full Windows - boot first before rebooting into Linux and we will recognize such a - journal and empty it as it is clean by definition. - - Support journals ($LogFile) with only one restart page as well as - journals with two different restart pages. We sanity check both and - either use the only sane one or the more recent one of the two in the - case that both are valid. - - Lots of bug fixes and enhancements across the board. -2.1.23: - - Stamp the user space journal, aka transaction log, aka $UsnJrnl, if - it is present and active thus telling Windows and applications using - the transaction log that changes can have happened on the volume - which are not recorded in $UsnJrnl. - - Detect the case when Windows has been hibernated (suspended to disk) - and if this is the case do not allow (re)mounting read-write to - prevent data corruption when you boot back into the suspended - Windows session. - - Implement extension of resident files using the normal file write - code paths, i.e. most very small files can be extended to be a little - bit bigger but not by much. - - Add new mount option "disable_sparse". (See list of mount options - above for details.) - - Improve handling of ntfs volumes with errors and strange boot sectors - in particular. - - Fix various bugs including a nasty deadlock that appeared in recent - kernels (around 2.6.11-2.6.12 timeframe). -2.1.22: - - Improve handling of ntfs volumes with errors. - - Fix various bugs and race conditions. -2.1.21: - - Fix several race conditions and various other bugs. - - Many internal cleanups, code reorganization, optimizations, and mft - and index record writing code rewritten to fit in with the changes. - - Update Documentation/filesystems/ntfs.txt with instructions on how to - use the Device-Mapper driver with NTFS ftdisk/LDM raid. -2.1.20: - - Fix two stupid bugs introduced in 2.1.18 release. -2.1.19: - - Minor bugfix in handling of the default upcase table. - - Many internal cleanups and improvements. Many thanks to Linus - Torvalds and Al Viro for the help and advice with the sparse - annotations and cleanups. -2.1.18: - - Fix scheduling latencies at mount time. (Ingo Molnar) - - Fix endianness bug in a little traversed portion of the attribute - lookup code. -2.1.17: - - Fix bugs in mount time error code paths. -2.1.16: - - Implement access time updates (including mtime and ctime). - - Implement fsync(2), fdatasync(2), and msync(2) system calls. - - Enable the readv(2) and writev(2) system calls. - - Enable access via the asynchronous io (aio) API by adding support for - the aio_read(3) and aio_write(3) functions. -2.1.15: - - Invalidate quotas when (re)mounting read-write. - NOTE: This now only leave user space journalling on the side. (See - note for version 2.1.13, below.) -2.1.14: - - Fix an NFSd caused deadlock reported by several users. -2.1.13: - - Implement writing of inodes (access time updates are not implemented - yet so mounting with -o noatime,nodiratime is enforced). - - Enable writing out of resident files so you can now overwrite any - uncompressed, unencrypted, nonsparse file as long as you do not - change the file size. - - Add housekeeping of ntfs system files so that ntfsfix no longer needs - to be run after writing to an NTFS volume. - NOTE: This still leaves quota tracking and user space journalling on - the side but they should not cause data corruption. In the worst - case the charged quotas will be out of date ($Quota) and some - userspace applications might get confused due to the out of date - userspace journal ($UsnJrnl). -2.1.12: - - Fix the second fix to the decompression engine from the 2.1.9 release - and some further internals cleanups. -2.1.11: - - Driver internal cleanups. -2.1.10: - - Force read-only (re)mounting of volumes with unsupported volume - flags and various cleanups. -2.1.9: - - Fix two bugs in handling of corner cases in the decompression engine. -2.1.8: - - Read the $MFT mirror and compare it to the $MFT and if the two do not - match, force a read-only mount and do not allow read-write remounts. - - Read and parse the $LogFile journal and if it indicates that the - volume was not shutdown cleanly, force a read-only mount and do not - allow read-write remounts. If the $LogFile indicates a clean - shutdown and a read-write (re)mount is requested, empty $LogFile to - ensure that Windows cannot cause data corruption by replaying a stale - journal after Linux has written to the volume. - - Improve time handling so that the NTFS time is fully preserved when - converted to kernel time and only up to 99 nano-seconds are lost when - kernel time is converted to NTFS time. -2.1.7: - - Enable NFS exporting of mounted NTFS volumes. -2.1.6: - - Fix minor bug in handling of compressed directories that fixes the - erroneous "du" and "stat" output people reported. -2.1.5: - - Minor bug fix in attribute list attribute handling that fixes the - I/O errors on "ls" of certain fragmented files found by at least two - people running Windows XP. -2.1.4: - - Minor update allowing compilation with all gcc versions (well, the - ones the kernel can be compiled with anyway). -2.1.3: - - Major bug fixes for reading files and volumes in corner cases which - were being hit by Windows 2k/XP users. -2.1.2: - - Major bug fixes alleviating the hangs in statfs experienced by some - users. -2.1.1: - - Update handling of compressed files so people no longer get the - frequently reported warning messages about initialized_size != - data_size. -2.1.0: - - Add configuration option for developmental write support. - - Initial implementation of file overwriting. (Writes to resident files - are not written out to disk yet, so avoid writing to files smaller - than about 1kiB.) - - Intercept/abort changes in file size as they are not implemented yet. -2.0.25: - - Minor bugfixes in error code paths and small cleanups. -2.0.24: - - Small internal cleanups. - - Support for sendfile system call. (Christoph Hellwig) -2.0.23: - - Massive internal locking changes to mft record locking. Fixes - various race conditions and deadlocks. - - Fix ntfs over loopback for compressed files by adding an - optimization barrier. (gcc was screwing up otherwise ?) - Thanks go to Christoph Hellwig for pointing these two out: - - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs(). - - Fix ntfs_free() for ia64 and parisc. -2.0.22: - - Small internal cleanups. -2.0.21: - These only affect 32-bit architectures: - - Check for, and refuse to mount too large volumes (maximum is 2TiB). - - Check for, and refuse to open too large files and directories - (maximum is 16TiB). -2.0.20: - - Support non-resident directory index bitmaps. This means we now cope - with huge directories without problems. - - Fix a page leak that manifested itself in some cases when reading - directory contents. - - Internal cleanups. -2.0.19: - - Fix race condition and improvements in block i/o interface. - - Optimization when reading compressed files. -2.0.18: - - Fix race condition in reading of compressed files. -2.0.17: - - Cleanups and optimizations. -2.0.16: - - Fix stupid bug introduced in 2.0.15 in new attribute inode API. - - Big internal cleanup replacing the mftbmp access hacks by using the - new attribute inode API instead. -2.0.15: - - Bug fix in parsing of remount options. - - Internal changes implementing attribute (fake) inodes allowing all - attribute i/o to go via the page cache and to use all the normal - vfs/mm functionality. -2.0.14: - - Internal changes improving run list merging code and minor locking - change to not rely on BKL in ntfs_statfs(). -2.0.13: - - Internal changes towards using iget5_locked() in preparation for - fake inodes and small cleanups to ntfs_volume structure. -2.0.12: - - Internal cleanups in address space operations made possible by the - changes introduced in the previous release. -2.0.11: - - Internal updates and cleanups introducing the first step towards - fake inode based attribute i/o. -2.0.10: - - Microsoft says that the maximum number of inodes is 2^32 - 1. Update - the driver accordingly to only use 32-bits to store inode numbers on - 32-bit architectures. This improves the speed of the driver a little. -2.0.9: - - Change decompression engine to use a single buffer. This should not - affect performance except perhaps on the most heavy i/o on SMP - systems when accessing multiple compressed files from multiple - devices simultaneously. - - Minor updates and cleanups. -2.0.8: - - Remove now obsolete show_inodes and posix mount option(s). - - Restore show_sys_files mount option. - - Add new mount option case_sensitive, to determine if the driver - treats file names as case sensitive or not. - - Mostly drop support for short file names (for backwards compatibility - we only support accessing files via their short file name if one - exists). - - Fix dcache aliasing issues wrt short/long file names. - - Cleanups and minor fixes. -2.0.7: - - Just cleanups. -2.0.6: - - Major bugfix to make compatible with other kernel changes. This fixes - the hangs/oopses on umount. - - Locking cleanup in directory operations (remove BKL usage). -2.0.5: - - Major buffer overflow bug fix. - - Minor cleanups and updates for kernel 2.5.12. -2.0.4: - - Cleanups and updates for kernel 2.5.11. -2.0.3: - - Small bug fixes, cleanups, and performance improvements. -2.0.2: - - Use default fmask of 0177 so that files are no executable by default. - If you want owner executable files, just use fmask=0077. - - Update for kernel 2.5.9 but preserve backwards compatibility with - kernel 2.5.7. - - Minor bug fixes, cleanups, and updates. -2.0.1: - - Minor updates, primarily set the executable bit by default on files - so they can be executed. -2.0.0: - - Started ChangeLog. - diff --git a/MAINTAINERS b/MAINTAINERS index b0f17d59078..6c59c6697a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -564,7 +564,7 @@ L: linux-alpha@vger.kernel.org F: arch/alpha/ ALTERA TRIPLE SPEED ETHERNET DRIVER -M: Vince Bridgers <vbridgers2013@gmail.com> +M: Vince Bridgers <vbridger@opensource.altera.com> L: netdev@vger.kernel.org L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) S: Maintained @@ -3085,14 +3085,13 @@ S: Supported F: drivers/acpi/dock.c DOCUMENTATION -M: Jiri Kosina <jkosina@suse.cz> +M: Jonathan Corbet <corbet@lwn.net> L: linux-doc@vger.kernel.org S: Maintained F: Documentation/ X: Documentation/ABI/ X: Documentation/devicetree/ X: Documentation/[a-z][a-z]_[A-Z][A-Z]/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/doc.git DOUBLETALK DRIVER M: "James R. Van Zandt" <jrv@vanzandt.mv.com> @@ -5993,6 +5992,7 @@ L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git +T: git git://git.infradead.org/l2-mtd.git S: Maintained F: drivers/mtd/ F: include/linux/mtd/ diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index a4d52c42a43..5fa3755261c 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -1440,6 +1440,8 @@ static int gpmc_probe_nand_child(struct platform_device *pdev, break; } + gpmc_nand_data->flash_bbt = of_get_nand_on_flash_bbt(child); + val = of_get_nand_bus_width(child); if (val == 16) gpmc_nand_data->devsize = NAND_BUSWIDTH_16; diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts index 8eb6d94c785..2e25de0800b 100644 --- a/arch/arm64/boot/dts/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -41,6 +41,10 @@ status = "ok"; }; +&sgenet0 { + status = "ok"; +}; + &xgenet { status = "ok"; }; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 87d3205e98d..295c72d52a1 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -176,6 +176,16 @@ clock-output-names = "menetclk"; }; + sge0clk: sge0clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + csr-mask = <0x3>; + clock-output-names = "sge0clk"; + }; + xge0clk: xge0clk@1f61c000 { compatible = "apm,xgene-device-clock"; #clock-cells = <1>; @@ -611,6 +621,20 @@ }; }; + sgenet0: ethernet@1f210000 { + compatible = "apm,xgene-enet"; + status = "disabled"; + reg = <0x0 0x1f210000 0x0 0x10000>, + <0x0 0x1f200000 0x0 0X10000>, + <0x0 0x1B000000 0x0 0X20000>; + reg-names = "enet_csr", "ring_csr", "ring_cmd"; + interrupts = <0x0 0xA0 0x4>; + dma-coherent; + clocks = <&sge0clk 0>; + local-mac-address = [00 00 00 00 00 00]; + phy-connection-type = "sgmii"; + }; + xgenet: ethernet@1f610000 { compatible = "apm,xgene-enet"; status = "disabled"; diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index b2667375928..1f0aa2024e9 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -39,6 +39,14 @@ 297: wr %o5, FPRS_FEF, %fprs; \ 298: +#define VISEntryHalfFast(fail_label) \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,pt %icc, 297f; \ + nop; \ + ba,a,pt %xcc, fail_label; \ +297: wr %o5, FPRS_FEF, %fprs; + #define VISExitHalf \ wr %o5, 0, %fprs; diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 9cf2ee01cee..140527a20e7 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -41,6 +41,10 @@ #endif #endif +#if !defined(EX_LD) && !defined(EX_ST) +#define NON_USER_COPY +#endif + #ifndef EX_LD #define EX_LD(x) x #endif @@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o3), %o0 .Llarge_src_unaligned: +#ifdef NON_USER_COPY + VISEntryHalfFast(.Lmedium_vis_entry_fail) +#else + VISEntryHalf +#endif andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 - VISEntryHalf alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) @@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop ba,a,pt %icc, .Lmedium_unaligned +#ifdef NON_USER_COPY +.Lmedium_vis_entry_fail: + or %o0, %o1, %g2 +#endif .Lmedium: LOAD(prefetch, %o1 + 0x40, #n_reads_strong) andcc %g2, 0x7, %g0 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d9dcfa27aa8..0acac81f198 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -472,6 +472,7 @@ struct vcpu_vmx { int gs_ldt_reload_needed; int fs_reload_needed; u64 msr_host_bndcfgs; + unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { int vm86_active; @@ -4267,11 +4268,16 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) u32 low32, high32; unsigned long tmpl; struct desc_ptr dt; + unsigned long cr4; vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ - vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ + /* Save the most likely value for this task's CR4 in the VMCS. */ + cr4 = read_cr4(); + vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ + vmx->host_state.vmcs_host_cr4 = cr4; + vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 /* @@ -7514,7 +7520,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr; + unsigned long debugctlmsr, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) @@ -7540,6 +7546,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + cr4 = read_cr4(); + if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { + vmcs_writel(HOST_CR4, cr4); + vmx->host_state.vmcs_host_cr4 = cr4; + } + /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d56cd1f515b..3f627345d51 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -182,12 +182,17 @@ struct jit_context { bool seen_ld_abs; }; +/* maximum number of bytes emitted while JITing one eBPF insn */ +#define BPF_MAX_INSN_SIZE 128 +#define BPF_INSN_SAFETY 64 + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { struct bpf_insn *insn = bpf_prog->insnsi; int insn_cnt = bpf_prog->len; - u8 temp[64]; + bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); + u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; int i; int proglen = 0; u8 *prog = temp; @@ -225,7 +230,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(0x31, 0xc0); /* xor eax, eax */ EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ - if (ctx->seen_ld_abs) { + if (seen_ld_abs) { /* r9d : skb->len - skb->data_len (headlen) * r10 : skb->data */ @@ -685,7 +690,7 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]); - if (ctx->seen_ld_abs) { + if (seen_ld_abs) { EMIT2(0x41, 0x52); /* push %r10 */ EMIT2(0x41, 0x51); /* push %r9 */ /* need to adjust jmp offset, since @@ -699,7 +704,7 @@ xadd: if (is_imm8(insn->off)) return -EINVAL; } EMIT1_off32(0xE8, jmp_offset); - if (ctx->seen_ld_abs) { + if (seen_ld_abs) { EMIT2(0x41, 0x59); /* pop %r9 */ EMIT2(0x41, 0x5A); /* pop %r10 */ } @@ -804,7 +809,8 @@ emit_jmp: goto common_load; case BPF_LD | BPF_ABS | BPF_W: func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); -common_load: ctx->seen_ld_abs = true; +common_load: + ctx->seen_ld_abs = seen_ld_abs = true; jmp_offset = func - (image + addrs[i]); if (!func || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", @@ -878,6 +884,11 @@ common_load: ctx->seen_ld_abs = true; } ilen = prog - temp; + if (ilen > BPF_MAX_INSN_SIZE) { + pr_err("bpf_jit_compile fatal insn size error\n"); + return -EFAULT; + } + if (image) { if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpf_jit_compile fatal error\n"); @@ -934,9 +945,11 @@ void bpf_int_jit_compile(struct bpf_prog *prog) goto out; } if (image) { - if (proglen != oldproglen) + if (proglen != oldproglen) { pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", proglen, oldproglen); + goto out; + } break; } if (proglen == oldproglen) { diff --git a/block/Kconfig b/block/Kconfig index 2429515c05c..161491d0a87 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -77,6 +77,7 @@ config BLK_DEV_BSGLIB config BLK_DEV_INTEGRITY bool "Block layer data integrity support" + select CRC_T10DIF if BLK_DEV_INTEGRITY ---help--- Some storage devices allow extra information to be stored/retrieved to help protect the data. The block layer diff --git a/block/Makefile b/block/Makefile index a2ce6ac935e..00ecc97629d 100644 --- a/block/Makefile +++ b/block/Makefile @@ -20,6 +20,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o +obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o + diff --git a/block/bio-integrity.c b/block/bio-integrity.c index f14b4abbebd..0984232e429 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -79,6 +79,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; + bio->bi_rw |= REQ_INTEGRITY; return bip; err: @@ -96,11 +97,12 @@ EXPORT_SYMBOL(bio_integrity_alloc); */ void bio_integrity_free(struct bio *bio) { - struct bio_integrity_payload *bip = bio->bi_integrity; + struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_set *bs = bio->bi_pool; - if (bip->bip_owns_buf) - kfree(bip->bip_buf); + if (bip->bip_flags & BIP_BLOCK_INTEGRITY) + kfree(page_address(bip->bip_vec->bv_page) + + bip->bip_vec->bv_offset); if (bs) { if (bip->bip_slab != BIO_POOL_NONE) @@ -128,7 +130,7 @@ EXPORT_SYMBOL(bio_integrity_free); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - struct bio_integrity_payload *bip = bio->bi_integrity; + struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_vec *iv; if (bip->bip_vcnt >= bip->bip_max_vcnt) { @@ -147,24 +149,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_integrity_add_page); -static int bdev_integrity_enabled(struct block_device *bdev, int rw) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi == NULL) - return 0; - - if (rw == READ && bi->verify_fn != NULL && - (bi->flags & INTEGRITY_FLAG_READ)) - return 1; - - if (rw == WRITE && bi->generate_fn != NULL && - (bi->flags & INTEGRITY_FLAG_WRITE)) - return 1; - - return 0; -} - /** * bio_integrity_enabled - Check whether integrity can be passed * @bio: bio to check @@ -174,171 +158,86 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw) * set prior to calling. The functions honors the write_generate and * read_verify flags in sysfs. */ -int bio_integrity_enabled(struct bio *bio) +bool bio_integrity_enabled(struct bio *bio) { + struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + if (!bio_is_rw(bio)) - return 0; + return false; /* Already protected? */ if (bio_integrity(bio)) - return 0; + return false; + + if (bi == NULL) + return false; + + if (bio_data_dir(bio) == READ && bi->verify_fn != NULL && + (bi->flags & BLK_INTEGRITY_VERIFY)) + return true; + + if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL && + (bi->flags & BLK_INTEGRITY_GENERATE)) + return true; - return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio)); + return false; } EXPORT_SYMBOL(bio_integrity_enabled); /** - * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto + * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device - * @sectors: Number of 512 sectors to convert + * @sectors: Size of the bio in 512-byte sectors * * Description: The block layer calculates everything in 512 byte - * sectors but integrity metadata is done in terms of the hardware - * sector size of the storage device. Convert the block layer sectors - * to physical sectors. + * sectors but integrity metadata is done in terms of the data integrity + * interval size of the storage device. Convert the block layer sectors + * to the appropriate number of integrity intervals. */ -static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, - unsigned int sectors) +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) { - /* At this point there are only 512b or 4096b DIF/EPP devices */ - if (bi->sector_size == 4096) - return sectors >>= 3; - - return sectors; + return sectors >> (ilog2(bi->interval) - 9); } static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, unsigned int sectors) { - return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size; + return bio_integrity_intervals(bi, sectors) * bi->tuple_size; } /** - * bio_integrity_tag_size - Retrieve integrity tag space - * @bio: bio to inspect - * - * Description: Returns the maximum number of tag bytes that can be - * attached to this bio. Filesystems can use this to determine how - * much metadata to attach to an I/O. - */ -unsigned int bio_integrity_tag_size(struct bio *bio) -{ - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - - BUG_ON(bio->bi_iter.bi_size == 0); - - return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size); -} -EXPORT_SYMBOL(bio_integrity_tag_size); - -static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, - int set) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - unsigned int nr_sectors; - - BUG_ON(bip->bip_buf == NULL); - - if (bi->tag_size == 0) - return -1; - - nr_sectors = bio_integrity_hw_sectors(bi, - DIV_ROUND_UP(len, bi->tag_size)); - - if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) { - printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__, - nr_sectors * bi->tuple_size, bip->bip_iter.bi_size); - return -1; - } - - if (set) - bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors); - else - bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors); - - return 0; -} - -/** - * bio_integrity_set_tag - Attach a tag buffer to a bio - * @bio: bio to attach buffer to - * @tag_buf: Pointer to a buffer containing tag data - * @len: Length of the included buffer - * - * Description: Use this function to tag a bio by leveraging the extra - * space provided by devices formatted with integrity protection. The - * size of the integrity buffer must be <= to the size reported by - * bio_integrity_tag_size(). - */ -int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len) -{ - BUG_ON(bio_data_dir(bio) != WRITE); - - return bio_integrity_tag(bio, tag_buf, len, 1); -} -EXPORT_SYMBOL(bio_integrity_set_tag); - -/** - * bio_integrity_get_tag - Retrieve a tag buffer from a bio - * @bio: bio to retrieve buffer from - * @tag_buf: Pointer to a buffer for the tag data - * @len: Length of the target buffer - * - * Description: Use this function to retrieve the tag buffer from a - * completed I/O. The size of the integrity buffer must be <= to the - * size reported by bio_integrity_tag_size(). - */ -int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) -{ - BUG_ON(bio_data_dir(bio) != READ); - - return bio_integrity_tag(bio, tag_buf, len, 0); -} -EXPORT_SYMBOL(bio_integrity_get_tag); - -/** - * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio + * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for - * @operate: operate number, 1 for generate, 0 for verify + * @proc_fn: Pointer to the relevant processing function */ -static int bio_integrity_generate_verify(struct bio *bio, int operate) +static int bio_integrity_process(struct bio *bio, + integrity_processing_fn *proc_fn) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - struct blk_integrity_exchg bix; + struct blk_integrity_iter iter; struct bio_vec *bv; - sector_t sector; - unsigned int sectors, ret = 0, i; - void *prot_buf = bio->bi_integrity->bip_buf; - - if (operate) - sector = bio->bi_iter.bi_sector; - else - sector = bio->bi_integrity->bip_iter.bi_sector; + struct bio_integrity_payload *bip = bio_integrity(bio); + unsigned int i, ret = 0; + void *prot_buf = page_address(bip->bip_vec->bv_page) + + bip->bip_vec->bv_offset; - bix.disk_name = bio->bi_bdev->bd_disk->disk_name; - bix.sector_size = bi->sector_size; + iter.disk_name = bio->bi_bdev->bd_disk->disk_name; + iter.interval = bi->interval; + iter.seed = bip_get_seed(bip); + iter.prot_buf = prot_buf; bio_for_each_segment_all(bv, bio, i) { void *kaddr = kmap_atomic(bv->bv_page); - bix.data_buf = kaddr + bv->bv_offset; - bix.data_size = bv->bv_len; - bix.prot_buf = prot_buf; - bix.sector = sector; - - if (operate) - bi->generate_fn(&bix); - else { - ret = bi->verify_fn(&bix); - if (ret) { - kunmap_atomic(kaddr); - return ret; - } - } - sectors = bv->bv_len / bi->sector_size; - sector += sectors; - prot_buf += sectors * bi->tuple_size; + iter.data_buf = kaddr + bv->bv_offset; + iter.data_size = bv->bv_len; + + ret = proc_fn(&iter); + if (ret) { + kunmap_atomic(kaddr); + return ret; + } kunmap_atomic(kaddr); } @@ -346,28 +245,6 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate) } /** - * bio_integrity_generate - Generate integrity metadata for a bio - * @bio: bio to generate integrity metadata for - * - * Description: Generates integrity metadata for a bio by calling the - * block device's generation callback function. The bio must have a - * bip attached with enough room to accommodate the generated - * integrity metadata. - */ -static void bio_integrity_generate(struct bio *bio) -{ - bio_integrity_generate_verify(bio, 1); -} - -static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) -{ - if (bi) - return bi->tuple_size; - - return 0; -} - -/** * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare * @@ -387,17 +264,17 @@ int bio_integrity_prep(struct bio *bio) unsigned long start, end; unsigned int len, nr_pages; unsigned int bytes, offset, i; - unsigned int sectors; + unsigned int intervals; bi = bdev_get_integrity(bio->bi_bdev); q = bdev_get_queue(bio->bi_bdev); BUG_ON(bi == NULL); BUG_ON(bio_integrity(bio)); - sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio)); + intervals = bio_integrity_intervals(bi, bio_sectors(bio)); /* Allocate kernel buffer for protection data */ - len = sectors * blk_integrity_tuple_size(bi); + len = intervals * bi->tuple_size; buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); @@ -416,10 +293,12 @@ int bio_integrity_prep(struct bio *bio) return -EIO; } - bip->bip_owns_buf = 1; - bip->bip_buf = buf; + bip->bip_flags |= BIP_BLOCK_INTEGRITY; bip->bip_iter.bi_size = len; - bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; + bip_set_seed(bip, bio->bi_iter.bi_sector); + + if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM) + bip->bip_flags |= BIP_IP_CHECKSUM; /* Map it */ offset = offset_in_page(buf); @@ -455,26 +334,13 @@ int bio_integrity_prep(struct bio *bio) /* Auto-generate integrity metadata if this is a write */ if (bio_data_dir(bio) == WRITE) - bio_integrity_generate(bio); + bio_integrity_process(bio, bi->generate_fn); return 0; } EXPORT_SYMBOL(bio_integrity_prep); /** - * bio_integrity_verify - Verify integrity metadata for a bio - * @bio: bio to verify - * - * Description: This function is called to verify the integrity of a - * bio. The data in the bio io_vec is compared to the integrity - * metadata returned by the HBA. - */ -static int bio_integrity_verify(struct bio *bio) -{ - return bio_integrity_generate_verify(bio, 0); -} - -/** * bio_integrity_verify_fn - Integrity I/O completion worker * @work: Work struct stored in bio to be verified * @@ -487,9 +353,10 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; + struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); int error; - error = bio_integrity_verify(bio); + error = bio_integrity_process(bio, bi->verify_fn); /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; @@ -510,7 +377,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) */ void bio_integrity_endio(struct bio *bio, int error) { - struct bio_integrity_payload *bip = bio->bi_integrity; + struct bio_integrity_payload *bip = bio_integrity(bio); BUG_ON(bip->bip_bio != bio); @@ -541,7 +408,7 @@ EXPORT_SYMBOL(bio_integrity_endio); */ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { - struct bio_integrity_payload *bip = bio->bi_integrity; + struct bio_integrity_payload *bip = bio_integrity(bio); struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); @@ -563,7 +430,7 @@ EXPORT_SYMBOL(bio_integrity_advance); void bio_integrity_trim(struct bio *bio, unsigned int offset, unsigned int sectors) { - struct bio_integrity_payload *bip = bio->bi_integrity; + struct bio_integrity_payload *bip = bio_integrity(bio); struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); bio_integrity_advance(bio, offset << 9); @@ -582,7 +449,7 @@ EXPORT_SYMBOL(bio_integrity_trim); int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { - struct bio_integrity_payload *bip_src = bio_src->bi_integrity; + struct bio_integrity_payload *bip_src = bio_integrity(bio_src); struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); @@ -646,6 +513,4 @@ void __init bio_integrity_init(void) sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * BIP_INLINE_VECS, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - if (!bip_slab) - panic("Failed to create slab\n"); } diff --git a/block/bio.c b/block/bio.c index 3e6331d25d9..3e6e1986a5b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -428,6 +428,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) front_pad = 0; inline_vecs = nr_iovecs; } else { + /* should not use nobvec bioset for nr_iovecs > 0 */ + if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0)) + return NULL; /* * generic_make_request() converts recursion to iteration; this * means if we're running beneath it, any bios we allocate and @@ -1900,20 +1903,9 @@ void bioset_free(struct bio_set *bs) } EXPORT_SYMBOL(bioset_free); -/** - * bioset_create - Create a bio_set - * @pool_size: Number of bio and bio_vecs to cache in the mempool - * @front_pad: Number of bytes to allocate in front of the returned bio - * - * Description: - * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller - * to ask for a number of bytes to be allocated in front of the bio. - * Front pad allocation is useful for embedding the bio inside - * another structure, to avoid allocating extra data to go with the bio. - * Note that the bio must be embedded at the END of that structure always, - * or things will break badly. - */ -struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) +static struct bio_set *__bioset_create(unsigned int pool_size, + unsigned int front_pad, + bool create_bvec_pool) { unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); struct bio_set *bs; @@ -1938,9 +1930,11 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - bs->bvec_pool = biovec_create_pool(pool_size); - if (!bs->bvec_pool) - goto bad; + if (create_bvec_pool) { + bs->bvec_pool = biovec_create_pool(pool_size); + if (!bs->bvec_pool) + goto bad; + } bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); if (!bs->rescue_workqueue) @@ -1951,8 +1945,41 @@ bad: bioset_free(bs); return NULL; } + +/** + * bioset_create - Create a bio_set + * @pool_size: Number of bio and bio_vecs to cache in the mempool + * @front_pad: Number of bytes to allocate in front of the returned bio + * + * Description: + * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller + * to ask for a number of bytes to be allocated in front of the bio. + * Front pad allocation is useful for embedding the bio inside + * another structure, to avoid allocating extra data to go with the bio. + * Note that the bio must be embedded at the END of that structure always, + * or things will break badly. + */ +struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) +{ + return __bioset_create(pool_size, front_pad, true); +} EXPORT_SYMBOL(bioset_create); +/** + * bioset_create_nobvec - Create a bio_set without bio_vec mempool + * @pool_size: Number of bio to cache in the mempool + * @front_pad: Number of bytes to allocate in front of the returned bio + * + * Description: + * Same functionality as bioset_create() except that mempool is not + * created for bio_vecs. Saving some memory for bio_clone_fast() users. + */ +struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad) +{ + return __bioset_create(pool_size, front_pad, false); +} +EXPORT_SYMBOL(bioset_create_nobvec); + #ifdef CONFIG_BLK_CGROUP /** * bio_associate_current - associate a bio with %current diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e17da947f6b..0ac817b750d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -822,7 +822,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) static struct cgroup_subsys_state * blkcg_css_alloc(struct cgroup_subsys_state *parent_css) { - static atomic64_t id_seq = ATOMIC64_INIT(0); struct blkcg *blkcg; if (!parent_css) { @@ -836,7 +835,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; - blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index d3fd7aa3d2a..c567865b5f1 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -50,9 +50,6 @@ struct blkcg { struct blkcg_gq *blkg_hint; struct hlist_head blkg_list; - /* for policies to test whether associated blkcg has changed */ - uint64_t id; - /* TODO: per-policy storage in blkcg */ unsigned int cfq_weight; /* belongs to cfq */ unsigned int cfq_leaf_weight; diff --git a/block/blk-core.c b/block/blk-core.c index 9c888bd22b0..0421b53e643 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q) * @bdev: device * * Locates the passed device's request queue and returns the address of its - * backing_dev_info - * - * Will return NULL if the request queue cannot be located. + * backing_dev_info. This function can only be called if @bdev is opened + * and the return value is never NULL. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - struct backing_dev_info *ret = NULL; struct request_queue *q = bdev_get_queue(bdev); - if (q) - ret = &q->backing_dev_info; - return ret; + return &q->backing_dev_info; } EXPORT_SYMBOL(blk_get_backing_dev_info); @@ -394,11 +390,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) * be drained. Check all the queues and counters. */ if (drain_all) { + struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); drain |= !list_empty(&q->queue_head); for (i = 0; i < 2; i++) { drain |= q->nr_rqs[i]; drain |= q->in_flight[i]; - drain |= !list_empty(&q->flush_queue[i]); + if (fq) + drain |= !list_empty(&fq->flush_queue[i]); } } @@ -604,9 +602,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) #ifdef CONFIG_BLK_CGROUP INIT_LIST_HEAD(&q->blkg_list); #endif - INIT_LIST_HEAD(&q->flush_queue[0]); - INIT_LIST_HEAD(&q->flush_queue[1]); - INIT_LIST_HEAD(&q->flush_data_in_flight); INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -709,8 +704,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; - q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); - if (!q->flush_rq) + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0); + if (!q->fq) return NULL; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) @@ -746,7 +741,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, return q; fail: - kfree(q->flush_rq); + blk_free_flush_queue(q->fq); return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -934,8 +929,8 @@ static struct io_context *rq_ioc(struct bio *bio) * pressure or if @q is dead. * * Must be called with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Returns ERR_PTR on failure, with @q->queue_lock held. + * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *__get_request(struct request_list *rl, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -949,7 +944,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, int may_queue; if (unlikely(blk_queue_dying(q))) - return NULL; + return ERR_PTR(-ENODEV); may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) @@ -974,7 +969,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * process is not a "batcher", and not * exempted by the IO scheduler */ - return NULL; + return ERR_PTR(-ENOMEM); } } } @@ -992,7 +987,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * allocated with any setting of ->nr_requests */ if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) - return NULL; + return ERR_PTR(-ENOMEM); q->nr_rqs[is_sync]++; rl->count[is_sync]++; @@ -1065,8 +1060,8 @@ fail_elvpriv: * shouldn't stall IO. Treat this request as !elvpriv. This will * disturb iosched and blkcg but weird is bettern than dead. */ - printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", - dev_name(q->backing_dev_info.dev)); + printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", + __func__, dev_name(q->backing_dev_info.dev)); rq->cmd_flags &= ~REQ_ELVPRIV; rq->elv.icq = NULL; @@ -1097,7 +1092,7 @@ fail_alloc: rq_starved: if (unlikely(rl->count[is_sync] == 0)) rl->starved[is_sync] = 1; - return NULL; + return ERR_PTR(-ENOMEM); } /** @@ -1111,8 +1106,8 @@ rq_starved: * function keeps retrying under memory pressure and fails iff @q is dead. * * Must be called with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Returns ERR_PTR on failure, with @q->queue_lock held. + * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -1125,12 +1120,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags, rl = blk_get_rl(q, bio); /* transferred to @rq on success */ retry: rq = __get_request(rl, rw_flags, bio, gfp_mask); - if (rq) + if (!IS_ERR(rq)) return rq; if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { blk_put_rl(rl); - return NULL; + return rq; } /* wait on @rl and retry */ @@ -1167,7 +1162,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, spin_lock_irq(q->queue_lock); rq = get_request(q, rw, NULL, gfp_mask); - if (!rq) + if (IS_ERR(rq)) spin_unlock_irq(q->queue_lock); /* q->queue_lock is unlocked at this point */ @@ -1219,8 +1214,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, { struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); - if (unlikely(!rq)) - return ERR_PTR(-ENOMEM); + if (IS_ERR(rq)) + return rq; blk_rq_set_block_pc(rq); @@ -1614,8 +1609,8 @@ get_rq: * Returns with the queue unlocked. */ req = get_request(q, rw_flags, bio, GFP_NOIO); - if (unlikely(!req)) { - bio_endio(bio, -ENODEV); /* @q is dead */ + if (IS_ERR(req)) { + bio_endio(bio, PTR_ERR(req)); /* @q is dead */ goto out_unlock; } @@ -2405,11 +2400,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) { int total_bytes; + trace_block_rq_complete(req->q, req, nr_bytes); + if (!req->bio) return false; - trace_block_rq_complete(req->q, req, nr_bytes); - /* * For fs requests, rq is just carrier of independent bio's * and each partial completion should be handled separately. @@ -2449,8 +2444,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) error_type = "I/O"; break; } - printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", - error_type, req->rq_disk ? + printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n", + __func__, error_type, req->rq_disk ? req->rq_disk->disk_name : "?", (unsigned long long)blk_rq_pos(req)); @@ -2931,7 +2926,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, blk_rq_init(NULL, rq); __rq_for_each_bio(bio_src, rq_src) { - bio = bio_clone_bioset(bio_src, gfp_mask, bs); + bio = bio_clone_fast(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; diff --git a/block/blk-flush.c b/block/blk-flush.c index 3cb5e9e7108..20badd7b9d1 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -28,7 +28,7 @@ * * The actual execution of flush is double buffered. Whenever a request * needs to execute PRE or POSTFLUSH, it queues at - * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a + * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a * flush is issued and the pending_idx is toggled. When the flush * completes, all the requests which were pending are proceeded to the next * step. This allows arbitrary merging of different types of FLUSH/FUA @@ -91,7 +91,8 @@ enum { FLUSH_PENDING_TIMEOUT = 5 * HZ, }; -static bool blk_kick_flush(struct request_queue *q); +static bool blk_kick_flush(struct request_queue *q, + struct blk_flush_queue *fq); static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { @@ -126,8 +127,6 @@ static void blk_flush_restore_request(struct request *rq) /* make @rq a normal request */ rq->cmd_flags &= ~REQ_FLUSH_SEQ; rq->end_io = rq->flush.saved_end_io; - - blk_clear_rq_complete(rq); } static bool blk_flush_queue_rq(struct request *rq, bool add_front) @@ -150,6 +149,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front) /** * blk_flush_complete_seq - complete flush sequence * @rq: FLUSH/FUA request being sequenced + * @fq: flush queue * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) * @error: whether an error occurred * @@ -157,16 +157,17 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front) * completion and trigger the next step. * * CONTEXT: - * spin_lock_irq(q->queue_lock or q->mq_flush_lock) + * spin_lock_irq(q->queue_lock or fq->mq_flush_lock) * * RETURNS: * %true if requests were added to the dispatch queue, %false otherwise. */ -static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, - int error) +static bool blk_flush_complete_seq(struct request *rq, + struct blk_flush_queue *fq, + unsigned int seq, int error) { struct request_queue *q = rq->q; - struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; + struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; bool queued = false, kicked; BUG_ON(rq->flush.seq & seq); @@ -182,12 +183,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, case REQ_FSEQ_POSTFLUSH: /* queue for flush */ if (list_empty(pending)) - q->flush_pending_since = jiffies; + fq->flush_pending_since = jiffies; list_move_tail(&rq->flush.list, pending); break; case REQ_FSEQ_DATA: - list_move_tail(&rq->flush.list, &q->flush_data_in_flight); + list_move_tail(&rq->flush.list, &fq->flush_data_in_flight); queued = blk_flush_queue_rq(rq, true); break; @@ -202,7 +203,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, list_del_init(&rq->flush.list); blk_flush_restore_request(rq); if (q->mq_ops) - blk_mq_end_io(rq, error); + blk_mq_end_request(rq, error); else __blk_end_request_all(rq, error); break; @@ -211,7 +212,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, BUG(); } - kicked = blk_kick_flush(q); + kicked = blk_kick_flush(q, fq); return kicked | queued; } @@ -222,17 +223,18 @@ static void flush_end_io(struct request *flush_rq, int error) bool queued = false; struct request *rq, *n; unsigned long flags = 0; + struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx); if (q->mq_ops) { - spin_lock_irqsave(&q->mq_flush_lock, flags); - q->flush_rq->tag = -1; + spin_lock_irqsave(&fq->mq_flush_lock, flags); + flush_rq->tag = -1; } - running = &q->flush_queue[q->flush_running_idx]; - BUG_ON(q->flush_pending_idx == q->flush_running_idx); + running = &fq->flush_queue[fq->flush_running_idx]; + BUG_ON(fq->flush_pending_idx == fq->flush_running_idx); /* account completion of the flush request */ - q->flush_running_idx ^= 1; + fq->flush_running_idx ^= 1; if (!q->mq_ops) elv_completed_request(q, flush_rq); @@ -242,7 +244,7 @@ static void flush_end_io(struct request *flush_rq, int error) unsigned int seq = blk_flush_cur_seq(rq); BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); - queued |= blk_flush_complete_seq(rq, seq, error); + queued |= blk_flush_complete_seq(rq, fq, seq, error); } /* @@ -256,71 +258,81 @@ static void flush_end_io(struct request *flush_rq, int error) * directly into request_fn may confuse the driver. Always use * kblockd. */ - if (queued || q->flush_queue_delayed) { + if (queued || fq->flush_queue_delayed) { WARN_ON(q->mq_ops); blk_run_queue_async(q); } - q->flush_queue_delayed = 0; + fq->flush_queue_delayed = 0; if (q->mq_ops) - spin_unlock_irqrestore(&q->mq_flush_lock, flags); + spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked + * @fq: flush queue * * Flush related states of @q have changed, consider issuing flush request. * Please read the comment at the top of this file for more info. * * CONTEXT: - * spin_lock_irq(q->queue_lock or q->mq_flush_lock) + * spin_lock_irq(q->queue_lock or fq->mq_flush_lock) * * RETURNS: * %true if flush was issued, %false otherwise. */ -static bool blk_kick_flush(struct request_queue *q) +static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) { - struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; + struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; struct request *first_rq = list_first_entry(pending, struct request, flush.list); + struct request *flush_rq = fq->flush_rq; /* C1 described at the top of this file */ - if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending)) + if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) return false; /* C2 and C3 */ - if (!list_empty(&q->flush_data_in_flight) && + if (!list_empty(&fq->flush_data_in_flight) && time_before(jiffies, - q->flush_pending_since + FLUSH_PENDING_TIMEOUT)) + fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) return false; /* * Issue flush and toggle pending_idx. This makes pending_idx * different from running_idx, which means flush is in flight. */ - q->flush_pending_idx ^= 1; + fq->flush_pending_idx ^= 1; - blk_rq_init(q, q->flush_rq); - if (q->mq_ops) - blk_mq_clone_flush_request(q->flush_rq, first_rq); + blk_rq_init(q, flush_rq); + + /* + * Borrow tag from the first request since they can't + * be in flight at the same time. + */ + if (q->mq_ops) { + flush_rq->mq_ctx = first_rq->mq_ctx; + flush_rq->tag = first_rq->tag; + } - q->flush_rq->cmd_type = REQ_TYPE_FS; - q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; - q->flush_rq->rq_disk = first_rq->rq_disk; - q->flush_rq->end_io = flush_end_io; + flush_rq->cmd_type = REQ_TYPE_FS; + flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; + flush_rq->rq_disk = first_rq->rq_disk; + flush_rq->end_io = flush_end_io; - return blk_flush_queue_rq(q->flush_rq, false); + return blk_flush_queue_rq(flush_rq, false); } static void flush_data_end_io(struct request *rq, int error) { struct request_queue *q = rq->q; + struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); /* * After populating an empty queue, kick it to avoid stall. Read * the comment in flush_end_io(). */ - if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) + if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) blk_run_queue_async(q); } @@ -328,20 +340,20 @@ static void mq_flush_data_end_io(struct request *rq, int error) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; + struct blk_mq_ctx *ctx = rq->mq_ctx; unsigned long flags; + struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx); - ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); /* * After populating an empty queue, kick it to avoid stall. Read * the comment in flush_end_io(). */ - spin_lock_irqsave(&q->mq_flush_lock, flags); - if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) + spin_lock_irqsave(&fq->mq_flush_lock, flags); + if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) blk_mq_run_hw_queue(hctx, true); - spin_unlock_irqrestore(&q->mq_flush_lock, flags); + spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } /** @@ -361,6 +373,7 @@ void blk_insert_flush(struct request *rq) struct request_queue *q = rq->q; unsigned int fflags = q->flush_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); + struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); /* * @policy now records what operations need to be done. Adjust @@ -378,7 +391,7 @@ void blk_insert_flush(struct request *rq) */ if (!policy) { if (q->mq_ops) - blk_mq_end_io(rq, 0); + blk_mq_end_request(rq, 0); else __blk_end_bidi_request(rq, 0, 0, 0); return; @@ -411,14 +424,14 @@ void blk_insert_flush(struct request *rq) if (q->mq_ops) { rq->end_io = mq_flush_data_end_io; - spin_lock_irq(&q->mq_flush_lock); - blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); - spin_unlock_irq(&q->mq_flush_lock); + spin_lock_irq(&fq->mq_flush_lock); + blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0); + spin_unlock_irq(&fq->mq_flush_lock); return; } rq->end_io = flush_data_end_io; - blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); + blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0); } /** @@ -474,7 +487,43 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, } EXPORT_SYMBOL(blkdev_issue_flush); -void blk_mq_init_flush(struct request_queue *q) +struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, + int node, int cmd_size) +{ + struct blk_flush_queue *fq; + int rq_sz = sizeof(struct request); + + fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node); + if (!fq) + goto fail; + + if (q->mq_ops) { + spin_lock_init(&fq->mq_flush_lock); + rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); + } + + fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); + if (!fq->flush_rq) + goto fail_rq; + + INIT_LIST_HEAD(&fq->flush_queue[0]); + INIT_LIST_HEAD(&fq->flush_queue[1]); + INIT_LIST_HEAD(&fq->flush_data_in_flight); + + return fq; + + fail_rq: + kfree(fq); + fail: + return NULL; +} + +void blk_free_flush_queue(struct blk_flush_queue *fq) { - spin_lock_init(&q->mq_flush_lock); + /* bio based request queue hasn't flush queue */ + if (!fq) + return; + + kfree(fq->flush_rq); + kfree(fq); } diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 7fbab84399e..79ffb4855af 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -154,10 +154,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) if (!b1 || !b2) return -1; - if (b1->sector_size != b2->sector_size) { - printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, - gd1->disk_name, gd2->disk_name, - b1->sector_size, b2->sector_size); + if (b1->interval != b2->interval) { + pr_err("%s: %s/%s protection interval %u != %u\n", + __func__, gd1->disk_name, gd2->disk_name, + b1->interval, b2->interval); return -1; } @@ -186,37 +186,53 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) } EXPORT_SYMBOL(blk_integrity_compare); -int blk_integrity_merge_rq(struct request_queue *q, struct request *req, - struct request *next) +bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, + struct request *next) { - if (blk_integrity_rq(req) != blk_integrity_rq(next)) - return -1; + if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0) + return true; + + if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0) + return false; + + if (bio_integrity(req->bio)->bip_flags != + bio_integrity(next->bio)->bip_flags) + return false; if (req->nr_integrity_segments + next->nr_integrity_segments > q->limits.max_integrity_segments) - return -1; + return false; - return 0; + return true; } EXPORT_SYMBOL(blk_integrity_merge_rq); -int blk_integrity_merge_bio(struct request_queue *q, struct request *req, - struct bio *bio) +bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, + struct bio *bio) { int nr_integrity_segs; struct bio *next = bio->bi_next; + if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL) + return true; + + if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL) + return false; + + if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags) + return false; + bio->bi_next = NULL; nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); bio->bi_next = next; if (req->nr_integrity_segments + nr_integrity_segs > q->limits.max_integrity_segments) - return -1; + return false; req->nr_integrity_segments += nr_integrity_segs; - return 0; + return true; } EXPORT_SYMBOL(blk_integrity_merge_bio); @@ -269,42 +285,48 @@ static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page) return sprintf(page, "0\n"); } -static ssize_t integrity_read_store(struct blk_integrity *bi, - const char *page, size_t count) +static ssize_t integrity_verify_store(struct blk_integrity *bi, + const char *page, size_t count) { char *p = (char *) page; unsigned long val = simple_strtoul(p, &p, 10); if (val) - bi->flags |= INTEGRITY_FLAG_READ; + bi->flags |= BLK_INTEGRITY_VERIFY; else - bi->flags &= ~INTEGRITY_FLAG_READ; + bi->flags &= ~BLK_INTEGRITY_VERIFY; return count; } -static ssize_t integrity_read_show(struct blk_integrity *bi, char *page) +static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page) { - return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0); + return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0); } -static ssize_t integrity_write_store(struct blk_integrity *bi, - const char *page, size_t count) +static ssize_t integrity_generate_store(struct blk_integrity *bi, + const char *page, size_t count) { char *p = (char *) page; unsigned long val = simple_strtoul(p, &p, 10); if (val) - bi->flags |= INTEGRITY_FLAG_WRITE; + bi->flags |= BLK_INTEGRITY_GENERATE; else - bi->flags &= ~INTEGRITY_FLAG_WRITE; + bi->flags &= ~BLK_INTEGRITY_GENERATE; return count; } -static ssize_t integrity_write_show(struct blk_integrity *bi, char *page) +static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page) +{ + return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0); +} + +static ssize_t integrity_device_show(struct blk_integrity *bi, char *page) { - return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0); + return sprintf(page, "%u\n", + (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0); } static struct integrity_sysfs_entry integrity_format_entry = { @@ -317,23 +339,29 @@ static struct integrity_sysfs_entry integrity_tag_size_entry = { .show = integrity_tag_size_show, }; -static struct integrity_sysfs_entry integrity_read_entry = { +static struct integrity_sysfs_entry integrity_verify_entry = { .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR }, - .show = integrity_read_show, - .store = integrity_read_store, + .show = integrity_verify_show, + .store = integrity_verify_store, }; -static struct integrity_sysfs_entry integrity_write_entry = { +static struct integrity_sysfs_entry integrity_generate_entry = { .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR }, - .show = integrity_write_show, - .store = integrity_write_store, + .show = integrity_generate_show, + .store = integrity_generate_store, +}; + +static struct integrity_sysfs_entry integrity_device_entry = { + .attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO }, + .show = integrity_device_show, }; static struct attribute *integrity_attrs[] = { &integrity_format_entry.attr, &integrity_tag_size_entry.attr, - &integrity_read_entry.attr, - &integrity_write_entry.attr, + &integrity_verify_entry.attr, + &integrity_generate_entry.attr, + &integrity_device_entry.attr, NULL, }; @@ -406,8 +434,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) kobject_uevent(&bi->kobj, KOBJ_ADD); - bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE; - bi->sector_size = queue_logical_block_size(disk->queue); + bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE; + bi->interval = queue_logical_block_size(disk->queue); disk->integrity = bi; } else bi = disk->integrity; @@ -418,9 +446,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) bi->generate_fn = template->generate_fn; bi->verify_fn = template->verify_fn; bi->tuple_size = template->tuple_size; - bi->set_tag_fn = template->set_tag_fn; - bi->get_tag_fn = template->get_tag_fn; bi->tag_size = template->tag_size; + bi->flags |= template->flags; } else bi->name = bi_unsupported_name; diff --git a/block/blk-merge.c b/block/blk-merge.c index 77881798f79..ba99351c0f5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -97,14 +97,18 @@ void blk_recalc_rq_segments(struct request *rq) void blk_recount_segments(struct request_queue *q, struct bio *bio) { - if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && + bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, + &q->queue_flags); + + if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) && bio->bi_vcnt < queue_max_segments(q)) bio->bi_phys_segments = bio->bi_vcnt; else { struct bio *nxt = bio->bi_next; bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, + no_sg_merge); bio->bi_next = nxt; } @@ -313,7 +317,7 @@ static inline int ll_new_hw_segment(struct request_queue *q, if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) goto no_merge; - if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) + if (blk_integrity_merge_bio(q, req, bio) == false) goto no_merge; /* @@ -410,7 +414,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > queue_max_segments(q)) return 0; - if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) + if (blk_integrity_merge_rq(q, req, next) == false) return 0; /* Merge is OK... */ @@ -590,7 +594,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return false; /* only merge integrity protected bio into ditto rq */ - if (bio_integrity(bio) != blk_integrity_rq(rq)) + if (blk_integrity_merge_bio(rq->q, rq, bio) == false) return false; /* must be using the same buffer */ diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c1b92426c95..8317175a300 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -351,15 +351,12 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) return; wait_cnt = atomic_dec_return(&bs->wait_cnt); + if (unlikely(wait_cnt < 0)) + wait_cnt = atomic_inc_return(&bs->wait_cnt); if (wait_cnt == 0) { -wake: atomic_add(bt->wake_cnt, &bs->wait_cnt); bt_index_atomic_inc(&bt->wake_index); wake_up(&bs->wait); - } else if (wait_cnt < 0) { - wait_cnt = atomic_inc_return(&bs->wait_cnt); - if (!wait_cnt) - goto wake; } } @@ -392,45 +389,37 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, __blk_mq_put_reserved_tag(tags, tag); } -static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, - unsigned long *free_map, unsigned int off) +static void bt_for_each(struct blk_mq_hw_ctx *hctx, + struct blk_mq_bitmap_tags *bt, unsigned int off, + busy_iter_fn *fn, void *data, bool reserved) { - int i; + struct request *rq; + int bit, i; for (i = 0; i < bt->map_nr; i++) { struct blk_align_bitmap *bm = &bt->map[i]; - int bit = 0; - do { - bit = find_next_zero_bit(&bm->word, bm->depth, bit); - if (bit >= bm->depth) - break; - - __set_bit(bit + off, free_map); - bit++; - } while (1); + for (bit = find_first_bit(&bm->word, bm->depth); + bit < bm->depth; + bit = find_next_bit(&bm->word, bm->depth, bit + 1)) { + rq = blk_mq_tag_to_rq(hctx->tags, off + bit); + if (rq->q == hctx->queue) + fn(hctx, rq, data, reserved); + } off += (1 << bt->bits_per_word); } } -void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, - void (*fn)(void *, unsigned long *), void *data) +void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, + void *priv) { - unsigned long *tag_map; - size_t map_size; - - map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG; - tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC); - if (!tag_map) - return; + struct blk_mq_tags *tags = hctx->tags; - bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags); if (tags->nr_reserved_tags) - bt_for_each_free(&tags->breserved_tags, tag_map, 0); - - fn(data, tag_map); - kfree(tag_map); + bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true); + bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, + false); } EXPORT_SYMBOL(blk_mq_tag_busy_iter); @@ -463,8 +452,8 @@ static void bt_update_count(struct blk_mq_bitmap_tags *bt, } bt->wake_cnt = BT_WAIT_BATCH; - if (bt->wake_cnt > depth / 4) - bt->wake_cnt = max(1U, depth / 4); + if (bt->wake_cnt > depth / BT_WAIT_QUEUES) + bt->wake_cnt = max(1U, depth / BT_WAIT_QUEUES); bt->depth = depth; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 38f4a165640..68929bad9a6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -20,6 +20,7 @@ #include <linux/cache.h> #include <linux/sched/sysctl.h> #include <linux/delay.h> +#include <linux/crash_dump.h> #include <trace/events/block.h> @@ -223,9 +224,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, struct blk_mq_hw_ctx *hctx; struct request *rq; struct blk_mq_alloc_data alloc_data; + int ret; - if (blk_mq_queue_enter(q)) - return NULL; + ret = blk_mq_queue_enter(q); + if (ret) + return ERR_PTR(ret); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); @@ -245,6 +248,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, ctx = alloc_data.ctx; } blk_mq_put_ctx(ctx); + if (!rq) + return ERR_PTR(-EWOULDBLOCK); return rq; } EXPORT_SYMBOL(blk_mq_alloc_request); @@ -276,27 +281,7 @@ void blk_mq_free_request(struct request *rq) __blk_mq_free_request(hctx, ctx, rq); } -/* - * Clone all relevant state from a request that has been put on hold in - * the flush state machine into the preallocated flush request that hangs - * off the request queue. - * - * For a driver the flush request should be invisible, that's why we are - * impersonating the original request here. - */ -void blk_mq_clone_flush_request(struct request *flush_rq, - struct request *orig_rq) -{ - struct blk_mq_hw_ctx *hctx = - orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu); - - flush_rq->mq_ctx = orig_rq->mq_ctx; - flush_rq->tag = orig_rq->tag; - memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq), - hctx->cmd_size); -} - -inline void __blk_mq_end_io(struct request *rq, int error) +inline void __blk_mq_end_request(struct request *rq, int error) { blk_account_io_done(rq); @@ -308,15 +293,15 @@ inline void __blk_mq_end_io(struct request *rq, int error) blk_mq_free_request(rq); } } -EXPORT_SYMBOL(__blk_mq_end_io); +EXPORT_SYMBOL(__blk_mq_end_request); -void blk_mq_end_io(struct request *rq, int error) +void blk_mq_end_request(struct request *rq, int error) { if (blk_update_request(rq, error, blk_rq_bytes(rq))) BUG(); - __blk_mq_end_io(rq, error); + __blk_mq_end_request(rq, error); } -EXPORT_SYMBOL(blk_mq_end_io); +EXPORT_SYMBOL(blk_mq_end_request); static void __blk_mq_complete_request_remote(void *data) { @@ -356,7 +341,7 @@ void __blk_mq_complete_request(struct request *rq) struct request_queue *q = rq->q; if (!q->softirq_done_fn) - blk_mq_end_io(rq, rq->errors); + blk_mq_end_request(rq, rq->errors); else blk_mq_ipi_complete_request(rq); } @@ -380,7 +365,7 @@ void blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); -static void blk_mq_start_request(struct request *rq, bool last) +void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; @@ -417,35 +402,24 @@ static void blk_mq_start_request(struct request *rq, bool last) */ rq->nr_phys_segments++; } - - /* - * Flag the last request in the series so that drivers know when IO - * should be kicked off, if they don't do it on a per-request basis. - * - * Note: the flag isn't the only condition drivers should do kick off. - * If drive is busy, the last request might not have the bit set. - */ - if (last) - rq->cmd_flags |= REQ_END; } +EXPORT_SYMBOL(blk_mq_start_request); static void __blk_mq_requeue_request(struct request *rq) { struct request_queue *q = rq->q; trace_block_rq_requeue(q, rq); - clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); - - rq->cmd_flags &= ~REQ_END; - if (q->dma_drain_size && blk_rq_bytes(rq)) - rq->nr_phys_segments--; + if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { + if (q->dma_drain_size && blk_rq_bytes(rq)) + rq->nr_phys_segments--; + } } void blk_mq_requeue_request(struct request *rq) { __blk_mq_requeue_request(rq); - blk_clear_rq_complete(rq); BUG_ON(blk_queued_rq(rq)); blk_mq_add_to_requeue_list(rq, true); @@ -514,78 +488,35 @@ void blk_mq_kick_requeue_list(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_kick_requeue_list); -static inline bool is_flush_request(struct request *rq, unsigned int tag) +static inline bool is_flush_request(struct request *rq, + struct blk_flush_queue *fq, unsigned int tag) { return ((rq->cmd_flags & REQ_FLUSH_SEQ) && - rq->q->flush_rq->tag == tag); + fq->flush_rq->tag == tag); } struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { struct request *rq = tags->rqs[tag]; + /* mq_ctx of flush rq is always cloned from the corresponding req */ + struct blk_flush_queue *fq = blk_get_flush_queue(rq->q, rq->mq_ctx); - if (!is_flush_request(rq, tag)) + if (!is_flush_request(rq, fq, tag)) return rq; - return rq->q->flush_rq; + return fq->flush_rq; } EXPORT_SYMBOL(blk_mq_tag_to_rq); struct blk_mq_timeout_data { - struct blk_mq_hw_ctx *hctx; - unsigned long *next; - unsigned int *next_set; + unsigned long next; + unsigned int next_set; }; -static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) +void blk_mq_rq_timed_out(struct request *req, bool reserved) { - struct blk_mq_timeout_data *data = __data; - struct blk_mq_hw_ctx *hctx = data->hctx; - unsigned int tag; - - /* It may not be in flight yet (this is where - * the REQ_ATOMIC_STARTED flag comes in). The requests are - * statically allocated, so we know it's always safe to access the - * memory associated with a bit offset into ->rqs[]. - */ - tag = 0; - do { - struct request *rq; - - tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag); - if (tag >= hctx->tags->nr_tags) - break; - - rq = blk_mq_tag_to_rq(hctx->tags, tag++); - if (rq->q != hctx->queue) - continue; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) - continue; - - blk_rq_check_expired(rq, data->next, data->next_set); - } while (1); -} - -static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx, - unsigned long *next, - unsigned int *next_set) -{ - struct blk_mq_timeout_data data = { - .hctx = hctx, - .next = next, - .next_set = next_set, - }; - - /* - * Ask the tagging code to iterate busy requests, so we can - * check them for timeout. - */ - blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data); -} - -static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq) -{ - struct request_queue *q = rq->q; + struct blk_mq_ops *ops = req->q->mq_ops; + enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; /* * We know that complete is set at this point. If STARTED isn't set @@ -596,21 +527,54 @@ static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq) * we both flags will get cleared. So check here again, and ignore * a timeout event with a request that isn't active. */ - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) - return BLK_EH_NOT_HANDLED; + if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags)) + return; + + if (ops->timeout) + ret = ops->timeout(req, reserved); + + switch (ret) { + case BLK_EH_HANDLED: + __blk_mq_complete_request(req); + break; + case BLK_EH_RESET_TIMER: + blk_add_timer(req); + blk_clear_rq_complete(req); + break; + case BLK_EH_NOT_HANDLED: + break; + default: + printk(KERN_ERR "block: bad eh return: %d\n", ret); + break; + } +} + +static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + struct blk_mq_timeout_data *data = priv; - if (!q->mq_ops->timeout) - return BLK_EH_RESET_TIMER; + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + return; - return q->mq_ops->timeout(rq); + if (time_after_eq(jiffies, rq->deadline)) { + if (!blk_mark_rq_complete(rq)) + blk_mq_rq_timed_out(rq, reserved); + } else if (!data->next_set || time_after(data->next, rq->deadline)) { + data->next = rq->deadline; + data->next_set = 1; + } } -static void blk_mq_rq_timer(unsigned long data) +static void blk_mq_rq_timer(unsigned long priv) { - struct request_queue *q = (struct request_queue *) data; + struct request_queue *q = (struct request_queue *)priv; + struct blk_mq_timeout_data data = { + .next = 0, + .next_set = 0, + }; struct blk_mq_hw_ctx *hctx; - unsigned long next = 0; - int i, next_set = 0; + int i; queue_for_each_hw_ctx(q, hctx, i) { /* @@ -620,12 +584,12 @@ static void blk_mq_rq_timer(unsigned long data) if (!hctx->nr_ctx || !hctx->tags) continue; - blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); + blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data); } - if (next_set) { - next = blk_rq_timeout(round_jiffies_up(next)); - mod_timer(&q->timeout, next); + if (data.next_set) { + data.next = blk_rq_timeout(round_jiffies_up(data.next)); + mod_timer(&q->timeout, data.next); } else { queue_for_each_hw_ctx(q, hctx, i) blk_mq_tag_idle(hctx); @@ -751,9 +715,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) rq = list_first_entry(&rq_list, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_start_request(rq, list_empty(&rq_list)); - - ret = q->mq_ops->queue_rq(hctx, rq); + ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list)); switch (ret) { case BLK_MQ_RQ_QUEUE_OK: queued++; @@ -766,7 +728,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) pr_err("blk-mq: bad return on queue: %d\n", ret); case BLK_MQ_RQ_QUEUE_ERROR: rq->errors = -EIO; - blk_mq_end_io(rq, rq->errors); + blk_mq_end_request(rq, rq->errors); break; } @@ -1194,14 +1156,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) int ret; blk_mq_bio_to_request(rq, bio); - blk_mq_start_request(rq, true); /* * For OK queue, we are done. For error, kill it. Any other * error (busy), just add it to our list as we previously * would have done */ - ret = q->mq_ops->queue_rq(data.hctx, rq); + ret = q->mq_ops->queue_rq(data.hctx, rq, true); if (ret == BLK_MQ_RQ_QUEUE_OK) goto done; else { @@ -1209,7 +1170,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) if (ret == BLK_MQ_RQ_QUEUE_ERROR) { rq->errors = -EIO; - blk_mq_end_io(rq, rq->errors); + blk_mq_end_request(rq, rq->errors); goto done; } } @@ -1531,6 +1492,28 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, return NOTIFY_OK; } +static void blk_mq_exit_hctx(struct request_queue *q, + struct blk_mq_tag_set *set, + struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) +{ + unsigned flush_start_tag = set->queue_depth; + + blk_mq_tag_idle(hctx); + + if (set->ops->exit_request) + set->ops->exit_request(set->driver_data, + hctx->fq->flush_rq, hctx_idx, + flush_start_tag + hctx_idx); + + if (set->ops->exit_hctx) + set->ops->exit_hctx(hctx, hctx_idx); + + blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); + blk_free_flush_queue(hctx->fq); + kfree(hctx->ctxs); + blk_mq_free_bitmap(&hctx->ctx_map); +} + static void blk_mq_exit_hw_queues(struct request_queue *q, struct blk_mq_tag_set *set, int nr_queue) { @@ -1540,17 +1523,8 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; - - blk_mq_tag_idle(hctx); - - if (set->ops->exit_hctx) - set->ops->exit_hctx(hctx, i); - - blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); - kfree(hctx->ctxs); - blk_mq_free_bitmap(&hctx->ctx_map); + blk_mq_exit_hctx(q, set, hctx, i); } - } static void blk_mq_free_hw_queues(struct request_queue *q, @@ -1565,53 +1539,88 @@ static void blk_mq_free_hw_queues(struct request_queue *q, } } -static int blk_mq_init_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) +static int blk_mq_init_hctx(struct request_queue *q, + struct blk_mq_tag_set *set, + struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) { - struct blk_mq_hw_ctx *hctx; - unsigned int i; + int node; + unsigned flush_start_tag = set->queue_depth; + + node = hctx->numa_node; + if (node == NUMA_NO_NODE) + node = hctx->numa_node = set->numa_node; + + INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); + INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); + spin_lock_init(&hctx->lock); + INIT_LIST_HEAD(&hctx->dispatch); + hctx->queue = q; + hctx->queue_num = hctx_idx; + hctx->flags = set->flags; + hctx->cmd_size = set->cmd_size; + + blk_mq_init_cpu_notifier(&hctx->cpu_notifier, + blk_mq_hctx_notify, hctx); + blk_mq_register_cpu_notifier(&hctx->cpu_notifier); + + hctx->tags = set->tags[hctx_idx]; /* - * Initialize hardware queues + * Allocate space for all possible cpus to avoid allocation at + * runtime */ - queue_for_each_hw_ctx(q, hctx, i) { - int node; + hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), + GFP_KERNEL, node); + if (!hctx->ctxs) + goto unregister_cpu_notifier; - node = hctx->numa_node; - if (node == NUMA_NO_NODE) - node = hctx->numa_node = set->numa_node; + if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) + goto free_ctxs; - INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); - INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); - spin_lock_init(&hctx->lock); - INIT_LIST_HEAD(&hctx->dispatch); - hctx->queue = q; - hctx->queue_num = i; - hctx->flags = set->flags; - hctx->cmd_size = set->cmd_size; + hctx->nr_ctx = 0; - blk_mq_init_cpu_notifier(&hctx->cpu_notifier, - blk_mq_hctx_notify, hctx); - blk_mq_register_cpu_notifier(&hctx->cpu_notifier); + if (set->ops->init_hctx && + set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) + goto free_bitmap; - hctx->tags = set->tags[i]; + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); + if (!hctx->fq) + goto exit_hctx; - /* - * Allocate space for all possible cpus to avoid allocation at - * runtime - */ - hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), - GFP_KERNEL, node); - if (!hctx->ctxs) - break; + if (set->ops->init_request && + set->ops->init_request(set->driver_data, + hctx->fq->flush_rq, hctx_idx, + flush_start_tag + hctx_idx, node)) + goto free_fq; - if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) - break; + return 0; - hctx->nr_ctx = 0; + free_fq: + kfree(hctx->fq); + exit_hctx: + if (set->ops->exit_hctx) + set->ops->exit_hctx(hctx, hctx_idx); + free_bitmap: + blk_mq_free_bitmap(&hctx->ctx_map); + free_ctxs: + kfree(hctx->ctxs); + unregister_cpu_notifier: + blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); + + return -1; +} + +static int blk_mq_init_hw_queues(struct request_queue *q, + struct blk_mq_tag_set *set) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; - if (set->ops->init_hctx && - set->ops->init_hctx(hctx, set->driver_data, i)) + /* + * Initialize hardware queues + */ + queue_for_each_hw_ctx(q, hctx, i) { + if (blk_mq_init_hctx(q, set, hctx, i)) break; } @@ -1765,6 +1774,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!ctx) return ERR_PTR(-ENOMEM); + /* + * If a crashdump is active, then we are potentially in a very + * memory constrained environment. Limit us to 1 queue and + * 64 tags to prevent using too much memory. + */ + if (is_kdump_kernel()) { + set->nr_hw_queues = 1; + set->queue_depth = min(64U, set->queue_depth); + } + hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, set->numa_node); @@ -1783,7 +1802,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!hctxs[i]) goto err_hctxs; - if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL, + node)) goto err_hctxs; atomic_set(&hctxs[i]->nr_active, 0); @@ -1830,7 +1850,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) else blk_queue_make_request(q, blk_sq_make_request); - blk_queue_rq_timed_out(q, blk_mq_rq_timed_out); if (set->timeout) blk_queue_rq_timeout(q, set->timeout); @@ -1842,17 +1861,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (set->ops->complete) blk_queue_softirq_done(q, set->ops->complete); - blk_mq_init_flush(q); blk_mq_init_cpu_queues(q, set->nr_hw_queues); - q->flush_rq = kzalloc(round_up(sizeof(struct request) + - set->cmd_size, cache_line_size()), - GFP_KERNEL); - if (!q->flush_rq) - goto err_hw; - if (blk_mq_init_hw_queues(q, set)) - goto err_flush_rq; + goto err_hw; mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); @@ -1864,8 +1876,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) return q; -err_flush_rq: - kfree(q->flush_rq); err_hw: blk_cleanup_queue(q); err_hctxs: diff --git a/block/blk-mq.h b/block/blk-mq.h index ca4964a6295..d567d5283ff 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -27,7 +27,6 @@ struct blk_mq_ctx { void __blk_mq_complete_request(struct request *rq); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); -void blk_mq_init_flush(struct request_queue *q); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, @@ -60,6 +59,8 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); +extern void blk_mq_rq_timed_out(struct request *req, bool reserved); + /* * Basic implementation of sparser bitmap, allowing the user to spread * the bits over more cachelines. diff --git a/block/blk-settings.c b/block/blk-settings.c index f1a1795a568..aa02247d227 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -574,7 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, bottom = max(b->physical_block_size, b->io_min) + alignment; /* Verify that top and bottom intervals line up */ - if (max(top, bottom) & (min(top, bottom) - 1)) { + if (max(top, bottom) % min(top, bottom)) { t->misaligned = 1; ret = -1; } @@ -619,7 +619,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Find lowest common alignment_offset */ t->alignment_offset = lcm(t->alignment_offset, alignment) - & (max(t->physical_block_size, t->io_min) - 1); + % max(t->physical_block_size, t->io_min); /* Verify that new alignment_offset is on a logical block boundary */ if (t->alignment_offset & (t->logical_block_size - 1)) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 521ae9089c5..1fac4340891 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -519,8 +519,8 @@ static void blk_release_queue(struct kobject *kobj) if (q->mq_ops) blk_mq_free_queue(q); - - kfree(q->flush_rq); + else + blk_free_flush_queue(q->fq); blk_trace_shutdown(q); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 95a09590ccf..56c025894cd 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -90,10 +90,7 @@ static void blk_rq_timed_out(struct request *req) switch (ret) { case BLK_EH_HANDLED: /* Can we use req->errors here? */ - if (q->mq_ops) - __blk_mq_complete_request(req); - else - __blk_complete_request(req); + __blk_complete_request(req); break; case BLK_EH_RESET_TIMER: blk_add_timer(req); @@ -113,7 +110,7 @@ static void blk_rq_timed_out(struct request *req) } } -void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, +static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, unsigned int *next_set) { if (time_after_eq(jiffies, rq->deadline)) { @@ -162,7 +159,10 @@ void blk_abort_request(struct request *req) if (blk_mark_rq_complete(req)) return; blk_delete_timer(req); - blk_rq_timed_out(req); + if (req->q->mq_ops) + blk_mq_rq_timed_out(req, false); + else + blk_rq_timed_out(req); } EXPORT_SYMBOL_GPL(blk_abort_request); @@ -190,7 +190,8 @@ void blk_add_timer(struct request *req) struct request_queue *q = req->q; unsigned long expiry; - if (!q->rq_timed_out_fn) + /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ + if (!q->mq_ops && !q->rq_timed_out_fn) return; BUG_ON(!list_empty(&req->timeout_list)); diff --git a/block/blk.h b/block/blk.h index 6748c4f8d7a..43b03618571 100644 --- a/block/blk.h +++ b/block/blk.h @@ -2,6 +2,8 @@ #define BLK_INTERNAL_H #include <linux/idr.h> +#include <linux/blk-mq.h> +#include "blk-mq.h" /* Amount of time in which a process may batch requests */ #define BLK_BATCH_TIME (HZ/50UL) @@ -12,16 +14,44 @@ /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + spinlock_t mq_flush_lock; +}; + extern struct kmem_cache *blk_requestq_cachep; extern struct kmem_cache *request_cachep; extern struct kobj_type blk_queue_ktype; extern struct ida blk_queue_ida; +static inline struct blk_flush_queue *blk_get_flush_queue( + struct request_queue *q, struct blk_mq_ctx *ctx) +{ + struct blk_mq_hw_ctx *hctx; + + if (!q->mq_ops) + return q->fq; + + hctx = q->mq_ops->map_queue(q, ctx->cpu); + + return hctx->fq; +} + static inline void __blk_get_queue(struct request_queue *q) { kobject_get(&q->kobj); } +struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, + int node, int cmd_size); +void blk_free_flush_queue(struct blk_flush_queue *q); + int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask); void blk_exit_rl(struct request_list *rl); @@ -38,8 +68,6 @@ bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); void blk_rq_timed_out_timer(unsigned long data); -void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, - unsigned int *next_set); unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); void blk_delete_timer(struct request *); @@ -88,6 +116,7 @@ void blk_insert_flush(struct request *rq); static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; + struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); while (1) { if (!list_empty(&q->queue_head)) { @@ -110,9 +139,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) * should be restarted later. Please see flush_end_io() for * details. */ - if (q->flush_pending_idx != q->flush_running_idx && + if (fq->flush_pending_idx != fq->flush_running_idx && !queue_flush_queueable(q)) { - q->flush_queue_delayed = 1; + fq->flush_queue_delayed = 1; return NULL; } if (unlikely(blk_queue_bypass(q)) || diff --git a/block/bsg.c b/block/bsg.c index ff46addde5d..276e869e686 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -270,8 +270,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, * map scatter-gather elements separately and string them to request */ rq = blk_get_request(q, rw, GFP_KERNEL); - if (!rq) - return ERR_PTR(-ENOMEM); + if (IS_ERR(rq)) + return rq; blk_rq_set_block_pc(rq); ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); @@ -285,8 +285,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, } next_rq = blk_get_request(q, READ, GFP_KERNEL); - if (!next_rq) { - ret = -ENOMEM; + if (IS_ERR(next_rq)) { + ret = PTR_ERR(next_rq); + next_rq = NULL; goto out; } rq->next_rq = next_rq; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3f31cf9508e..6f2751d305d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -299,7 +299,7 @@ struct cfq_io_cq { struct cfq_ttime ttime; int ioprio; /* the current ioprio */ #ifdef CONFIG_CFQ_GROUP_IOSCHED - uint64_t blkcg_id; /* the current blkcg ID */ + uint64_t blkcg_serial_nr; /* the current blkcg serial */ #endif }; @@ -3547,17 +3547,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { struct cfq_data *cfqd = cic_to_cfqd(cic); struct cfq_queue *sync_cfqq; - uint64_t id; + uint64_t serial_nr; rcu_read_lock(); - id = bio_blkcg(bio)->id; + serial_nr = bio_blkcg(bio)->css.serial_nr; rcu_read_unlock(); /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ - if (unlikely(!cfqd) || likely(cic->blkcg_id == id)) + if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr)) return; sync_cfqq = cic_to_cfqq(cic, 1); @@ -3571,7 +3571,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) cfq_put_queue(sync_cfqq); } - cic->blkcg_id = id; + cic->blkcg_serial_nr = serial_nr; } #else static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { } diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 18b282ce361..f678c733df4 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return compat_put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: /* compatible */ @@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKGETSIZE: diff --git a/block/ioctl.c b/block/ioctl.c index d6cda8147c9..6c7bf903742 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); @@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if(!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKBSZSET: diff --git a/block/partitions/mac.c b/block/partitions/mac.c index 76d8ba6379a..c2c48ec64b2 100644 --- a/block/partitions/mac.c +++ b/block/partitions/mac.c @@ -81,7 +81,7 @@ int mac_partition(struct parsed_partitions *state) be32_to_cpu(part->start_block) * (secsize/512), be32_to_cpu(part->block_count) * (secsize/512)); - if (!strnicmp(part->type, "Linux_RAID", 10)) + if (!strncasecmp(part->type, "Linux_RAID", 10)) state->parts[slot].flags = ADDPART_FLAG_RAID; #ifdef CONFIG_PPC_PMAC /* @@ -100,7 +100,7 @@ int mac_partition(struct parsed_partitions *state) goodness++; if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0 - || (strnicmp(part->type, "Linux", 5) == 0 + || (strncasecmp(part->type, "Linux", 5) == 0 && strcasecmp(part->type, "Linux_swap") != 0)) { int i, l; @@ -109,13 +109,13 @@ int mac_partition(struct parsed_partitions *state) if (strcmp(part->name, "/") == 0) goodness++; for (i = 0; i <= l - 4; ++i) { - if (strnicmp(part->name + i, "root", + if (strncasecmp(part->name + i, "root", 4) == 0) { goodness += 2; break; } } - if (strnicmp(part->name, "swap", 4) == 0) + if (strncasecmp(part->name, "swap", 4) == 0) goodness--; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 9b8eaeca6a7..abb2e65b24c 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -316,8 +316,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, ret = -ENOMEM; rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); - if (!rq) - goto out; + if (IS_ERR(rq)) + return PTR_ERR(rq); blk_rq_set_block_pc(rq); if (hdr->cmd_len > BLK_MAX_CDB) { @@ -387,7 +387,6 @@ out_free_cdb: kfree(rq->cmd); out_put_request: blk_put_request(rq); -out: return ret; } @@ -457,8 +456,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); - if (!rq) { - err = -ENOMEM; + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto error; } blk_rq_set_block_pc(rq); @@ -548,6 +547,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, int err; rq = blk_get_request(q, WRITE, __GFP_WAIT); + if (IS_ERR(rq)) + return PTR_ERR(rq); blk_rq_set_block_pc(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; rq->cmd[0] = cmd; diff --git a/block/t10-pi.c b/block/t10-pi.c new file mode 100644 index 00000000000..24d6e971531 --- /dev/null +++ b/block/t10-pi.c @@ -0,0 +1,197 @@ +/* + * t10_pi.c - Functions for generating and verifying T10 Protection + * Information. + * + * Copyright (C) 2007, 2008, 2014 Oracle Corporation + * Written by: Martin K. Petersen <martin.petersen@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, + * USA. + * + */ + +#include <linux/t10-pi.h> +#include <linux/blkdev.h> +#include <linux/crc-t10dif.h> +#include <net/checksum.h> + +typedef __be16 (csum_fn) (void *, unsigned int); + +static const __be16 APP_ESCAPE = (__force __be16) 0xffff; +static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff; + +static __be16 t10_pi_crc_fn(void *data, unsigned int len) +{ + return cpu_to_be16(crc_t10dif(data, len)); +} + +static __be16 t10_pi_ip_fn(void *data, unsigned int len) +{ + return (__force __be16)ip_compute_csum(data, len); +} + +/* + * Type 1 and Type 2 protection use the same format: 16 bit guard tag, + * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref + * tag. + */ +static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn, + unsigned int type) +{ + unsigned int i; + + for (i = 0 ; i < iter->data_size ; i += iter->interval) { + struct t10_pi_tuple *pi = iter->prot_buf; + + pi->guard_tag = fn(iter->data_buf, iter->interval); + pi->app_tag = 0; + + if (type == 1) + pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed)); + else + pi->ref_tag = 0; + + iter->data_buf += iter->interval; + iter->prot_buf += sizeof(struct t10_pi_tuple); + iter->seed++; + } + + return 0; +} + +static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn, + unsigned int type) +{ + unsigned int i; + + for (i = 0 ; i < iter->data_size ; i += iter->interval) { + struct t10_pi_tuple *pi = iter->prot_buf; + __be16 csum; + + switch (type) { + case 1: + case 2: + if (pi->app_tag == APP_ESCAPE) + goto next; + + if (be32_to_cpu(pi->ref_tag) != + lower_32_bits(iter->seed)) { + pr_err("%s: ref tag error at location %llu " \ + "(rcvd %u)\n", iter->disk_name, + (unsigned long long) + iter->seed, be32_to_cpu(pi->ref_tag)); + return -EILSEQ; + } + break; + case 3: + if (pi->app_tag == APP_ESCAPE && + pi->ref_tag == REF_ESCAPE) + goto next; + break; + } + + csum = fn(iter->data_buf, iter->interval); + + if (pi->guard_tag != csum) { + pr_err("%s: guard tag error at sector %llu " \ + "(rcvd %04x, want %04x)\n", iter->disk_name, + (unsigned long long)iter->seed, + be16_to_cpu(pi->guard_tag), be16_to_cpu(csum)); + return -EILSEQ; + } + +next: + iter->data_buf += iter->interval; + iter->prot_buf += sizeof(struct t10_pi_tuple); + iter->seed++; + } + + return 0; +} + +static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) +{ + return t10_pi_generate(iter, t10_pi_crc_fn, 1); +} + +static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) +{ + return t10_pi_generate(iter, t10_pi_ip_fn, 1); +} + +static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) +{ + return t10_pi_verify(iter, t10_pi_crc_fn, 1); +} + +static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) +{ + return t10_pi_verify(iter, t10_pi_ip_fn, 1); +} + +static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) +{ + return t10_pi_generate(iter, t10_pi_crc_fn, 3); +} + +static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) +{ + return t10_pi_generate(iter, t10_pi_ip_fn, 3); +} + +static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) +{ + return t10_pi_verify(iter, t10_pi_crc_fn, 3); +} + +static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) +{ + return t10_pi_verify(iter, t10_pi_ip_fn, 3); +} + +struct blk_integrity t10_pi_type1_crc = { + .name = "T10-DIF-TYPE1-CRC", + .generate_fn = t10_pi_type1_generate_crc, + .verify_fn = t10_pi_type1_verify_crc, + .tuple_size = sizeof(struct t10_pi_tuple), + .tag_size = 0, +}; +EXPORT_SYMBOL(t10_pi_type1_crc); + +struct blk_integrity t10_pi_type1_ip = { + .name = "T10-DIF-TYPE1-IP", + .generate_fn = t10_pi_type1_generate_ip, + .verify_fn = t10_pi_type1_verify_ip, + .tuple_size = sizeof(struct t10_pi_tuple), + .tag_size = 0, +}; +EXPORT_SYMBOL(t10_pi_type1_ip); + +struct blk_integrity t10_pi_type3_crc = { + .name = "T10-DIF-TYPE3-CRC", + .generate_fn = t10_pi_type3_generate_crc, + .verify_fn = t10_pi_type3_verify_crc, + .tuple_size = sizeof(struct t10_pi_tuple), + .tag_size = 0, +}; +EXPORT_SYMBOL(t10_pi_type3_crc); + +struct blk_integrity t10_pi_type3_ip = { + .name = "T10-DIF-TYPE3-IP", + .generate_fn = t10_pi_type3_generate_ip, + .verify_fn = t10_pi_type3_verify_ip, + .tuple_size = sizeof(struct t10_pi_tuple), + .tag_size = 0, +}; +EXPORT_SYMBOL(t10_pi_type3_ip); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index fa7d701933b..93eaf8d9449 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2614,27 +2614,7 @@ static struct pci_driver lanai_driver = { .probe = lanai_init_one, }; -static int __init lanai_module_init(void) -{ - int x; - - x = pci_register_driver(&lanai_driver); - if (x != 0) - printk(KERN_ERR DEV_LABEL ": no adapter found\n"); - return x; -} - -static void __exit lanai_module_exit(void) -{ - /* We'll only get called when all the interfaces are already - * gone, so there isn't much to do - */ - DPRINTK("cleanup_module()\n"); - pci_unregister_driver(&lanai_driver); -} - -module_init(lanai_module_init); -module_exit(lanai_module_exit); +module_pci_driver(lanai_driver); MODULE_AUTHOR("Mitchell Blank Jr <mitch@sfgoth.com>"); MODULE_DESCRIPTION("Efficient Networks Speedstream 3010 driver"); diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index d26a3fa6368..a2dfa169237 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -158,14 +158,14 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, if (bio_add_page(bio, device->md_io.page, size, 0) != size) goto out; bio->bi_private = device; - bio->bi_end_io = drbd_md_io_complete; + bio->bi_end_io = drbd_md_endio; bio->bi_rw = rw; if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL) /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ ; else if (!get_ldev_if_state(device, D_ATTACHING)) { - /* Corresponding put_ldev in drbd_md_io_complete() */ + /* Corresponding put_ldev in drbd_md_endio() */ drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); err = -ENODEV; goto out; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 426c97aef90..434c77dcc99 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -941,7 +941,7 @@ static void drbd_bm_aio_ctx_destroy(struct kref *kref) } /* bv_page may be a copy, or may be the original */ -static void bm_async_io_complete(struct bio *bio, int error) +static void drbd_bm_endio(struct bio *bio, int error) { struct drbd_bm_aio_ctx *ctx = bio->bi_private; struct drbd_device *device = ctx->device; @@ -1027,7 +1027,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho * according to api. Do we want to assert that? */ bio_add_page(bio, page, len, 0); bio->bi_private = ctx; - bio->bi_end_io = bm_async_io_complete; + bio->bi_end_io = drbd_bm_endio; if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { bio->bi_rw |= rw; @@ -1125,7 +1125,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned } /* - * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * We initialize ctx->in_flight to one to make sure drbd_bm_endio * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. * If all IO is done already (or nothing had been submitted), there is diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index 5c20b18540b..900d4d3272d 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -695,7 +695,7 @@ static void resync_dump_detail(struct seq_file *m, struct lc_element *e) { struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); - seq_printf(m, "%5d %s %s %s\n", bme->rs_left, + seq_printf(m, "%5d %s %s %s", bme->rs_left, test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------", test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------", test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------" diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1a000016ccd..9b22f8f01b5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -61,8 +61,6 @@ # define __must_hold(x) #endif -#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0) - /* module parameter, defined in drbd_main.c */ extern unsigned int minor_count; extern bool disable_sendpage; @@ -1483,7 +1481,7 @@ extern int drbd_khelper(struct drbd_device *device, char *cmd); /* drbd_worker.c */ /* bi_end_io handlers */ -extern void drbd_md_io_complete(struct bio *bio, int error); +extern void drbd_md_endio(struct bio *bio, int error); extern void drbd_peer_request_endio(struct bio *bio, int error); extern void drbd_request_endio(struct bio *bio, int error); extern int drbd_worker(struct drbd_thread *thi); @@ -2100,16 +2098,19 @@ static inline bool is_sync_state(enum drbd_conns connection_state) /** * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev - * @M: DRBD device. + * @_device: DRBD device. + * @_min_state: Minimum device state required for success. * * You have to call put_ldev() when finished working with device->ldev. */ -#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) -#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) +#define get_ldev_if_state(_device, _min_state) \ + (_get_ldev_if_state((_device), (_min_state)) ? \ + ({ __acquire(x); true; }) : false) +#define get_ldev(_device) get_ldev_if_state(_device, D_INCONSISTENT) static inline void put_ldev(struct drbd_device *device) { - enum drbd_disk_state ds = device->state.disk; + enum drbd_disk_state disk_state = device->state.disk; /* We must check the state *before* the atomic_dec becomes visible, * or we have a theoretical race where someone hitting zero, * while state still D_FAILED, will then see D_DISKLESS in the @@ -2122,10 +2123,10 @@ static inline void put_ldev(struct drbd_device *device) __release(local); D_ASSERT(device, i >= 0); if (i == 0) { - if (ds == D_DISKLESS) + if (disk_state == D_DISKLESS) /* even internal references gone, safe to destroy */ drbd_device_post_work(device, DESTROY_DISK); - if (ds == D_FAILED) + if (disk_state == D_FAILED) /* all application IO references gone. */ if (!test_and_set_bit(GOING_DISKLESS, &device->flags)) drbd_device_post_work(device, GO_DISKLESS); diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 89c497c630b..51b25ad8525 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -37,40 +37,8 @@ compute_subtree_last(struct drbd_interval *node) return max; } -static void augment_propagate(struct rb_node *rb, struct rb_node *stop) -{ - while (rb != stop) { - struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb); - sector_t subtree_last = compute_subtree_last(node); - if (node->end == subtree_last) - break; - node->end = subtree_last; - rb = rb_parent(&node->rb); - } -} - -static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new) -{ - struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb); - struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb); - - new->end = old->end; -} - -static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new) -{ - struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb); - struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb); - - new->end = old->end; - old->end = compute_subtree_last(old); -} - -static const struct rb_augment_callbacks augment_callbacks = { - augment_propagate, - augment_copy, - augment_rotate, -}; +RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb, + sector_t, end, compute_subtree_last); /** * drbd_insert_interval - insert a new interval into a tree @@ -79,6 +47,7 @@ bool drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) { struct rb_node **new = &root->rb_node, *parent = NULL; + sector_t this_end = this->sector + (this->size >> 9); BUG_ON(!IS_ALIGNED(this->size, 512)); @@ -87,6 +56,8 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) rb_entry(*new, struct drbd_interval, rb); parent = *new; + if (here->end < this_end) + here->end = this_end; if (this->sector < here->sector) new = &(*new)->rb_left; else if (this->sector > here->sector) @@ -99,6 +70,7 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) return false; } + this->end = this_end; rb_link_node(&this->rb, parent, new); rb_insert_augmented(&this->rb, root, &augment_callbacks); return true; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9b465bb6848..973c185c9cf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1622,13 +1622,13 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * struct drbd_socket *sock; struct p_data *p; unsigned int dp_flags = 0; - int dgs; + int digest_size; int err; sock = &peer_device->connection->data; p = drbd_prepare_command(peer_device, sock); - dgs = peer_device->connection->integrity_tfm ? - crypto_hash_digestsize(peer_device->connection->integrity_tfm) : 0; + digest_size = peer_device->connection->integrity_tfm ? + crypto_hash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -1659,9 +1659,9 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * /* our digest is still only over the payload. * TRIM does not carry any payload. */ - if (dgs) + if (digest_size) drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); - err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); + err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + digest_size, NULL, req->i.size); if (!err) { /* For protocol A, we have to memcpy the payload into * socket buffers, as we may complete right away @@ -1674,23 +1674,23 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * * out ok after sending on this side, but does not fit on the * receiving side, we sure have detected corruption elsewhere. */ - if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs) + if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || digest_size) err = _drbd_send_bio(peer_device, req->master_bio); else err = _drbd_send_zc_bio(peer_device, req->master_bio); /* double check digest, sometimes buffers have been modified in flight. */ - if (dgs > 0 && dgs <= 64) { + if (digest_size > 0 && digest_size <= 64) { /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest); - if (memcmp(p + 1, digest, dgs)) { + if (memcmp(p + 1, digest, digest_size)) { drbd_warn(device, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", (unsigned long long)req->i.sector, req->i.size); } - } /* else if (dgs > 64) { + } /* else if (digest_size > 64) { ... Be noisy about digest too large ... } */ } @@ -1711,13 +1711,13 @@ int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd, struct drbd_socket *sock; struct p_data *p; int err; - int dgs; + int digest_size; sock = &peer_device->connection->data; p = drbd_prepare_command(peer_device, sock); - dgs = peer_device->connection->integrity_tfm ? - crypto_hash_digestsize(peer_device->connection->integrity_tfm) : 0; + digest_size = peer_device->connection->integrity_tfm ? + crypto_hash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -1725,9 +1725,9 @@ int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd, p->block_id = peer_req->block_id; p->seq_num = 0; /* unused */ p->dp_flags = 0; - if (dgs) + if (digest_size) drbd_csum_ee(peer_device->connection->integrity_tfm, peer_req, p + 1); - err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); + err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*p) + digest_size, NULL, peer_req->i.size); if (!err) err = _drbd_send_zc_ee(peer_device, peer_req); mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 06e6147c760..3b10fa6cb03 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -142,10 +142,12 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se (unsigned long) Bit2KB(rs_left >> 10), (unsigned long) Bit2KB(rs_total >> 10)); else - seq_printf(seq, "(%lu/%lu)K\n\t", + seq_printf(seq, "(%lu/%lu)K", (unsigned long) Bit2KB(rs_left), (unsigned long) Bit2KB(rs_total)); + seq_printf(seq, "\n\t"); + /* see drivers/md/md.c * We do not want to overflow, so the order of operands and * the * 100 / 100 trick are important. We do a +1 to be diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9342b8da73a..6960fb06473 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1371,9 +1371,9 @@ int drbd_submit_peer_request(struct drbd_device *device, struct bio *bio; struct page *page = peer_req->pages; sector_t sector = peer_req->i.sector; - unsigned ds = peer_req->i.size; + unsigned data_size = peer_req->i.size; unsigned n_bios = 0; - unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; + unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { @@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device, list_add_tail(&peer_req->w.list, &device->active_ee); spin_unlock_irq(&device->resource->req_lock); if (blkdev_issue_zeroout(device->ldev->backing_bdev, - sector, ds >> 9, GFP_NOIO)) + sector, data_size >> 9, GFP_NOIO)) peer_req->flags |= EE_WAS_ERROR; drbd_endio_write_sec_final(peer_req); return 0; @@ -1426,12 +1426,12 @@ next_bio: ++n_bios; if (rw & REQ_DISCARD) { - bio->bi_iter.bi_size = ds; + bio->bi_iter.bi_size = data_size; goto submit; } page_chain_for_each(page) { - unsigned len = min_t(unsigned, ds, PAGE_SIZE); + unsigned len = min_t(unsigned, data_size, PAGE_SIZE); if (!bio_add_page(bio, page, len, 0)) { /* A single page must always be possible! * But in case it fails anyways, @@ -1446,11 +1446,11 @@ next_bio: } goto next_bio; } - ds -= len; + data_size -= len; sector += len >> 9; --nr_pages; } - D_ASSERT(device, ds == 0); + D_ASSERT(device, data_size == 0); submit: D_ASSERT(device, page == NULL); @@ -1591,24 +1591,24 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, const sector_t capacity = drbd_get_capacity(device->this_bdev); struct drbd_peer_request *peer_req; struct page *page; - int dgs, ds, err; - unsigned int data_size = pi->size; + int digest_size, err; + unsigned int data_size = pi->size, ds; void *dig_in = peer_device->connection->int_dig_in; void *dig_vv = peer_device->connection->int_dig_vv; unsigned long *data; struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; - dgs = 0; + digest_size = 0; if (!trim && peer_device->connection->peer_integrity_tfm) { - dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); + digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); /* * FIXME: Receive the incoming digest into the receive buffer * here, together with its struct p_data? */ - err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs); + err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); if (err) return NULL; - data_size -= dgs; + data_size -= digest_size; } if (trim) { @@ -1661,16 +1661,16 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, ds -= len; } - if (dgs) { + if (digest_size) { drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv); - if (memcmp(dig_in, dig_vv, dgs)) { + if (memcmp(dig_in, dig_vv, digest_size)) { drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); drbd_free_peer_req(device, peer_req); return NULL; } } - device->recv_cnt += data_size>>9; + device->recv_cnt += data_size >> 9; return peer_req; } @@ -1708,17 +1708,17 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req struct bio_vec bvec; struct bvec_iter iter; struct bio *bio; - int dgs, err, expect; + int digest_size, err, expect; void *dig_in = peer_device->connection->int_dig_in; void *dig_vv = peer_device->connection->int_dig_vv; - dgs = 0; + digest_size = 0; if (peer_device->connection->peer_integrity_tfm) { - dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); - err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs); + digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); + err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); if (err) return err; - data_size -= dgs; + data_size -= digest_size; } /* optimistically update recv_cnt. if receiving fails below, @@ -1738,9 +1738,9 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req data_size -= expect; } - if (dgs) { + if (digest_size) { drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); - if (memcmp(dig_in, dig_vv, dgs)) { + if (memcmp(dig_in, dig_vv, digest_size)) { drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); return -EINVAL; } @@ -5561,6 +5561,7 @@ int drbd_asender(struct drbd_thread *thi) * rv < expected: "woken" by signal during receive * rv == 0 : "connection shut down by peer" */ +received_more: if (likely(rv > 0)) { received += rv; buf += rv; @@ -5636,6 +5637,11 @@ int drbd_asender(struct drbd_thread *thi) expect = header_size; cmd = NULL; } + if (test_bit(SEND_PING, &connection->flags)) + continue; + rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT); + if (rv > 0) + goto received_more; } if (0) { diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c67717d572d..5a01c53ddde 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1645,6 +1645,6 @@ void request_timer_fn(unsigned long data) ? oldest_submit_jif + dt : now + et; nt = time_before(ent, dt) ? ent : dt; out: - spin_unlock_irq(&connection->resource->req_lock); + spin_unlock_irq(&device->resource->req_lock); mod_timer(&device->request_timer, nt); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c35c0f001bb..84b11f887d7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -136,50 +136,50 @@ enum drbd_role conn_highest_peer(struct drbd_connection *connection) enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection) { - enum drbd_disk_state ds = D_DISKLESS; + enum drbd_disk_state disk_state = D_DISKLESS; struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { struct drbd_device *device = peer_device->device; - ds = max_t(enum drbd_disk_state, ds, device->state.disk); + disk_state = max_t(enum drbd_disk_state, disk_state, device->state.disk); } rcu_read_unlock(); - return ds; + return disk_state; } enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection) { - enum drbd_disk_state ds = D_MASK; + enum drbd_disk_state disk_state = D_MASK; struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { struct drbd_device *device = peer_device->device; - ds = min_t(enum drbd_disk_state, ds, device->state.disk); + disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); } rcu_read_unlock(); - return ds; + return disk_state; } enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection) { - enum drbd_disk_state ds = D_DISKLESS; + enum drbd_disk_state disk_state = D_DISKLESS; struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { struct drbd_device *device = peer_device->device; - ds = max_t(enum drbd_disk_state, ds, device->state.pdsk); + disk_state = max_t(enum drbd_disk_state, disk_state, device->state.pdsk); } rcu_read_unlock(); - return ds; + return disk_state; } enum drbd_conns conn_lowest_conn(struct drbd_connection *connection) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 50776b36282..d2d1f97511b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -43,10 +43,10 @@ static int make_ov_request(struct drbd_device *, int); static int make_resync_request(struct drbd_device *, int); /* endio handlers: - * drbd_md_io_complete (defined here) + * drbd_md_endio (defined here) * drbd_request_endio (defined here) * drbd_peer_request_endio (defined here) - * bm_async_io_complete (defined in drbd_bitmap.c) + * drbd_bm_endio (defined in drbd_bitmap.c) * * For all these callbacks, note the following: * The callbacks will be called in irq context by the IDE drivers, @@ -65,7 +65,7 @@ rwlock_t global_state_lock; /* used for synchronous meta data and bitmap IO * submitted by drbd_md_sync_page_io() */ -void drbd_md_io_complete(struct bio *bio, int error) +void drbd_md_endio(struct bio *bio, int error) { struct drbd_device *device; @@ -1853,9 +1853,12 @@ static void drbd_ldev_destroy(struct drbd_device *device) device->resync = NULL; lc_destroy(device->act_log); device->act_log = NULL; - __no_warn(local, - drbd_free_ldev(device->ldev); - device->ldev = NULL;); + + __acquire(local); + drbd_free_ldev(device->ldev); + device->ldev = NULL; + __release(local); + clear_bit(GOING_DISKLESS, &device->flags); wake_up(&device->misc_wait); } @@ -1928,19 +1931,18 @@ void __update_timing_details( ++(*cb_nr); } -#define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit)) static void do_device_work(struct drbd_device *device, const unsigned long todo) { - if (WORK_PENDING(MD_SYNC, todo)) + if (test_bit(MD_SYNC, &todo)) do_md_sync(device); - if (WORK_PENDING(RS_DONE, todo) || - WORK_PENDING(RS_PROGRESS, todo)) - update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo)); - if (WORK_PENDING(GO_DISKLESS, todo)) + if (test_bit(RS_DONE, &todo) || + test_bit(RS_PROGRESS, &todo)) + update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); + if (test_bit(GO_DISKLESS, &todo)) go_diskless(device); - if (WORK_PENDING(DESTROY_DISK, todo)) + if (test_bit(DESTROY_DISK, &todo)) drbd_ldev_destroy(device); - if (WORK_PENDING(RS_START, todo)) + if (test_bit(RS_START, &todo)) do_start_resync(device); } @@ -1992,22 +1994,13 @@ static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head * return !list_empty(work_list); } -static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list) -{ - spin_lock_irq(&queue->q_lock); - if (!list_empty(&queue->q)) - list_move(queue->q.next, work_list); - spin_unlock_irq(&queue->q_lock); - return !list_empty(work_list); -} - static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) { DEFINE_WAIT(wait); struct net_conf *nc; int uncork, cork; - dequeue_work_item(&connection->sender_work, work_list); + dequeue_work_batch(&connection->sender_work, work_list); if (!list_empty(work_list)) return; @@ -2033,8 +2026,6 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); spin_lock_irq(&connection->resource->req_lock); spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ - /* dequeue single item only, - * we still use drbd_queue_work_front() in some places */ if (!list_empty(&connection->sender_work.q)) list_splice_tail_init(&connection->sender_work.q, work_list); spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ @@ -2121,7 +2112,7 @@ int drbd_worker(struct drbd_thread *thi) if (get_t_state(thi) != RUNNING) break; - while (!list_empty(&work_list)) { + if (!list_empty(&work_list)) { w = list_first_entry(&work_list, struct drbd_work, list); list_del_init(&w->list); update_worker_timing_details(connection, w->cb); @@ -2137,13 +2128,13 @@ int drbd_worker(struct drbd_thread *thi) update_worker_timing_details(connection, do_unqueued_work); do_unqueued_work(connection); } - while (!list_empty(&work_list)) { + if (!list_empty(&work_list)) { w = list_first_entry(&work_list, struct drbd_work, list); list_del_init(&w->list); update_worker_timing_details(connection, w->cb); w->cb(w, 1); - } - dequeue_work_batch(&connection->sender_work, &work_list); + } else + dequeue_work_batch(&connection->sender_work, &work_list); } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); rcu_read_lock(); diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 8a290c08262..3abb121825b 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -694,16 +694,6 @@ static const struct block_device_operations hd_fops = { .getgeo = hd_getgeo, }; -/* - * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags - * means we run the IRQ-handler with interrupts disabled: this is bad for - * interrupt latency, but anything else has led to problems on some - * machines. - * - * We enable interrupts in some of the routines after making sure it's - * safe. - */ - static int __init hd_init(void) { int drive; @@ -761,7 +751,7 @@ static int __init hd_init(void) p->cyl, p->head, p->sect); } - if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) { + if (request_irq(HD_IRQ, hd_interrupt, 0, "hd", NULL)) { printk("hd: unable to get IRQ%d for the hard disk driver\n", HD_IRQ); goto out1; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 5c8e7fe0774..1bd5f523f8f 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -247,7 +247,7 @@ static void mtip_async_complete(struct mtip_port *port, if (unlikely(cmd->unaligned)) up(&port->cmd_slot_unal); - blk_mq_end_io(rq, status ? -EIO : 0); + blk_mq_end_request(rq, status ? -EIO : 0); } /* @@ -3739,7 +3739,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) int err; err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); - blk_mq_end_io(rq, err); + blk_mq_end_request(rq, err); return 0; } @@ -3775,13 +3775,16 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, return false; } -static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) +static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool last) { int ret; if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; + blk_mq_start_request(rq); + ret = mtip_submit_request(hctx, rq); if (likely(!ret)) return BLK_MQ_RQ_QUEUE_OK; @@ -3951,6 +3954,7 @@ skip_create_disk: /* Set device limits. */ set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); + clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags); blk_queue_max_segments(dd->queue, MTIP_MAX_SG); blk_queue_physical_block_size(dd->queue, 4096); blk_queue_max_hw_sectors(dd->queue, 0xffff); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index fb31b8ee437..4bc2a5cb993 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -847,6 +847,7 @@ static int __init nbd_init(void) * Tell the block layer that we are not a rotational device */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 512; disk->queue->limits.max_discard_sectors = UINT_MAX; disk->queue->limits.discard_zeroes_data = 0; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 00d469c7f9f..2671a3f02f0 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -177,7 +177,7 @@ static void end_cmd(struct nullb_cmd *cmd) { switch (queue_mode) { case NULL_Q_MQ: - blk_mq_end_io(cmd->rq, 0); + blk_mq_end_request(cmd->rq, 0); return; case NULL_Q_RQ: INIT_LIST_HEAD(&cmd->rq->queuelist); @@ -313,13 +313,16 @@ static void null_request_fn(struct request_queue *q) } } -static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) +static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool last) { struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->rq = rq; cmd->nq = hctx->driver_data; + blk_mq_start_request(rq); + null_handle_cmd(cmd); return BLK_MQ_RQ_QUEUE_OK; } @@ -518,6 +521,7 @@ static int null_add_dev(void) nullb->q->queuedata = nullb; queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); disk = nullb->disk = alloc_disk_node(1, home_node); if (!disk) { diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 02351e21716..e2bb8afbeae 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1916,6 +1916,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, ns->queue->queue_flags = QUEUE_FLAG_DEFAULT; queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, ns->queue); blk_queue_make_request(ns->queue, nvme_make_request); ns->dev = dev; ns->queue->queuedata = ns; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index fea7e76a00d..d48715b287e 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -722,6 +722,8 @@ static int pd_special_command(struct pd_unit *disk, int err = 0; rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT); + if (IS_ERR(rq)) + return PTR_ERR(rq); rq->cmd_type = REQ_TYPE_SPECIAL; rq->special = func; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 758ac442c5b..09e628dafd9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,6 +704,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ, __GFP_WAIT); + if (IS_ERR(rq)) + return PTR_ERR(rq); blk_rq_set_block_pc(rq); if (cgc->buflen) { diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 3265ce94d28..d8b2488aaad 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -837,7 +837,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, "Failed to enable MSI\n"); } - st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED, + st = request_irq(dev->irq, rsxx_isr, IRQF_SHARED, DRIVER_NAME, card); if (st) { dev_err(CARD_TO_DEV(card), diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 2839d37e5af..40ee7705df6 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -307,6 +307,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue); if (rsxx_discard_supported(card)) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); blk_queue_max_discard_sectors(card->queue, diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 8fcdcfb4b47..1e46eb2305c 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -4426,6 +4426,7 @@ static int skd_cons_disk(struct skd_device *skdev) q->limits.discard_zeroes_data = 1; queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); spin_lock_irqsave(&skdev->lock, flags); pr_debug("%s:%s:%d stopping %s queue\n", diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index d5e2d12b9d9..5d552857de4 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -568,7 +568,7 @@ static struct carm_request *carm_get_special(struct carm_host *host) return NULL; rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL); - if (!rq) { + if (IS_ERR(rq)) { spin_lock_irqsave(&host->lock, flags); carm_put_request(host, crq); spin_unlock_irqrestore(&host->lock, flags); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0a581400de0..c6a27d54ad6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -41,12 +41,6 @@ struct virtio_blk /* Process context for config space updates */ struct work_struct config_work; - /* Lock for config space updates */ - struct mutex config_lock; - - /* enable config space updates */ - bool config_enable; - /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -135,7 +129,7 @@ static inline void virtblk_request_done(struct request *req) req->errors = (error != 0); } - blk_mq_end_io(req, error); + blk_mq_end_request(req, error); } static void virtblk_done(struct virtqueue *vq) @@ -164,14 +158,14 @@ static void virtblk_done(struct virtqueue *vq) spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } -static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, + bool last) { struct virtio_blk *vblk = hctx->queue->queuedata; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; unsigned int num; int qid = hctx->queue_num; - const bool last = (req->cmd_flags & REQ_END) != 0; int err; bool notify = false; @@ -205,6 +199,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) } } + blk_mq_start_request(req); + num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); if (num) { if (rq_data_dir(vbr->req) == WRITE) @@ -347,10 +343,6 @@ static void virtblk_config_changed_work(struct work_struct *work) char *envp[] = { "RESIZE=1", NULL }; u64 capacity, size; - mutex_lock(&vblk->config_lock); - if (!vblk->config_enable) - goto done; - /* Host must always specify the capacity. */ virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); @@ -374,8 +366,6 @@ static void virtblk_config_changed_work(struct work_struct *work) set_capacity(vblk->disk, capacity); revalidate_disk(vblk->disk); kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp); -done: - mutex_unlock(&vblk->config_lock); } static void virtblk_config_changed(struct virtio_device *vdev) @@ -606,10 +596,8 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; - mutex_init(&vblk->config_lock); INIT_WORK(&vblk->config_work, virtblk_config_changed_work); - vblk->config_enable = true; err = init_vq(vblk); if (err) @@ -733,6 +721,8 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err && opt_io_size) blk_queue_io_opt(q, blk_size * opt_io_size); + virtio_device_ready(vdev); + add_disk(vblk->disk); err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); if (err) @@ -771,10 +761,8 @@ static void virtblk_remove(struct virtio_device *vdev) int index = vblk->index; int refc; - /* Prevent config work handler from accessing the device. */ - mutex_lock(&vblk->config_lock); - vblk->config_enable = false; - mutex_unlock(&vblk->config_lock); + /* Make sure no work handler is accessing the device. */ + flush_work(&vblk->config_work); del_gendisk(vblk->disk); blk_cleanup_queue(vblk->disk->queue); @@ -784,8 +772,6 @@ static void virtblk_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - flush_work(&vblk->config_work); - refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); put_disk(vblk->disk); vdev->config->del_vqs(vdev); @@ -805,11 +791,7 @@ static int virtblk_freeze(struct virtio_device *vdev) /* Ensure we don't receive any more interrupts */ vdev->config->reset(vdev); - /* Prevent config work handler from accessing the device. */ - mutex_lock(&vblk->config_lock); - vblk->config_enable = false; - mutex_unlock(&vblk->config_lock); - + /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); blk_mq_stop_hw_queues(vblk->disk->queue); @@ -823,12 +805,14 @@ static int virtblk_restore(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; int ret; - vblk->config_enable = true; ret = init_vq(vdev->priv); - if (!ret) - blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + if (ret) + return ret; + + virtio_device_ready(vdev); - return ret; + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + return 0; } #endif diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 64c60edcdfb..63fc7f06a01 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -763,6 +763,7 @@ again: BUG_ON(new_map_idx >= segs_to_map); if (unlikely(map[new_map_idx].status != 0)) { pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); + put_free_pages(blkif, &pages[seg_idx]->page, 1); pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; ret |= 1; goto next; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 0b13b1c9a01..630a489e757 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -270,6 +270,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) blkif->blk_rings.common.sring = NULL; } + /* Remove all persistent grants and the cache of ballooned pages. */ + xen_blkbk_free_caches(blkif); + return 0; } @@ -281,9 +284,6 @@ static void xen_blkif_free(struct xen_blkif *blkif) xen_blkif_disconnect(blkif); xen_vbd_free(&blkif->vbd); - /* Remove all persistent grants and the cache of ballooned pages. */ - xen_blkbk_free_caches(blkif); - /* Make sure everything is drained before shutting down */ BUG_ON(blkif->persistent_gnt_c != 0); BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 37af03e9d85..5ac312f6e0b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -582,6 +582,14 @@ static inline void flush_requests(struct blkfront_info *info) notify_remote_via_irq(info->irq); } +static inline bool blkif_request_flush_valid(struct request *req, + struct blkfront_info *info) +{ + return ((req->cmd_type != REQ_TYPE_FS) || + ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && + !info->flush_op)); +} + /* * do_blkif_request * read a block; request is in a request queue @@ -604,9 +612,7 @@ static void do_blkif_request(struct request_queue *rq) blk_start_request(req); - if ((req->cmd_type != REQ_TYPE_FS) || - ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && - !info->flush_op)) { + if (blkif_request_flush_valid(req, info)) { __blk_end_request_all(req, -EIO); continue; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3b850164c65..0e63e8aa827 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1031,6 +1031,7 @@ static int create_device(struct zram *zram, int device_id) set_capacity(zram->disk, 0); /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 898b84bba28..5d28a45d296 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2180,8 +2180,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; rq = blk_get_request(q, READ, GFP_KERNEL); - if (!rq) { - ret = -ENOMEM; + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); break; } blk_rq_set_block_pc(rq); diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 132c9ccfdc6..72295ea2fd1 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -109,8 +109,8 @@ static int probe_common(struct virtio_device *vdev) vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL); if (index < 0) { - kfree(vi); - return index; + err = index; + goto err_ida; } sprintf(vi->name, "virtio_rng.%d", index); init_completion(&vi->have_data); @@ -128,13 +128,16 @@ static int probe_common(struct virtio_device *vdev) vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input"); if (IS_ERR(vi->vq)) { err = PTR_ERR(vi->vq); - vi->vq = NULL; - kfree(vi); - ida_simple_remove(&rng_index_ida, index); - return err; + goto err_find; } return 0; + +err_find: + ida_simple_remove(&rng_index_ida, index); +err_ida: + kfree(vi); + return err; } static void remove_common(struct virtio_device *vdev) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index b585b478982..bfa640023e6 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1449,6 +1449,8 @@ static int add_port(struct ports_device *portdev, u32 id) spin_lock_init(&port->outvq_lock); init_waitqueue_head(&port->waitqueue); + virtio_device_ready(portdev->vdev); + /* Fill the in_vq with buffers so the host can send us data. */ nr_added_bufs = fill_queue(port->in_vq, &port->inbuf_lock); if (!nr_added_bufs) { @@ -2182,6 +2184,8 @@ static int virtcons_restore(struct virtio_device *vdev) if (ret) return ret; + virtio_device_ready(portdev->vdev); + if (use_multiport(portdev)) fill_queue(portdev->c_ivq, &portdev->c_ivq_lock); diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c index dbd0f242ec1..76650e92db4 100644 --- a/drivers/ide/atiixp.c +++ b/drivers/ide/atiixp.c @@ -19,12 +19,12 @@ #define ATIIXP_IDE_UDMA_CONTROL 0x54 #define ATIIXP_IDE_UDMA_MODE 0x56 -typedef struct { +struct atiixp_ide_timing { u8 command_width; u8 recover_width; -} atiixp_ide_timing; +}; -static atiixp_ide_timing pio_timing[] = { +static struct atiixp_ide_timing pio_timing[] = { { 0x05, 0x0d }, { 0x04, 0x07 }, { 0x03, 0x04 }, @@ -32,7 +32,7 @@ static atiixp_ide_timing pio_timing[] = { { 0x02, 0x00 }, }; -static atiixp_ide_timing mdma_timing[] = { +static struct atiixp_ide_timing mdma_timing[] = { { 0x07, 0x07 }, { 0x02, 0x01 }, { 0x02, 0x00 }, diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index ee880382e3b..56b9708894a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -685,8 +685,10 @@ static void ide_disk_setup(ide_drive_t *drive) printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name, queue_max_sectors(q) / 2); - if (ata_id_is_ssd(id)) + if (ata_id_is_ssd(id)) { queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + } /* calculate drive capacity, and select LBA if possible */ ide_disk_get_capacity(drive); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index f41558a0bcd..ca958604cda 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -46,7 +46,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) * timeout has expired, so power management will be reenabled. */ rq = blk_get_request(q, READ, GFP_NOWAIT); - if (unlikely(!rq)) + if (IS_ERR(rq)) goto out; rq->cmd[0] = REQ_UNPARK_HEADS; diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index fd6d28f3fc3..1cc6ca8bfbd 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -506,7 +506,10 @@ static void send_message(capidrv_contr *card, _cmsg *cmsg) struct sk_buff *skb; size_t len; - capi_cmsg2message(cmsg, cmsg->buf); + if (capi_cmsg2message(cmsg, cmsg->buf)) { + printk(KERN_ERR "capidrv::send_message: parser failure\n"); + return; + } len = CAPIMSG_LEN(cmsg->buf); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { @@ -1578,7 +1581,12 @@ static _cmsg s_cmsg; static void capidrv_recv_message(struct capi20_appl *ap, struct sk_buff *skb) { - capi_message2cmsg(&s_cmsg, skb->data); + if (capi_message2cmsg(&s_cmsg, skb->data)) { + printk(KERN_ERR "capidrv: applid=%d: received invalid message\n", + ap->applid); + kfree_skb(skb); + return; + } if (debugmode > 3) { _cdebbuf *cdb = capi_cmsg2str(&s_cmsg); @@ -1903,7 +1911,11 @@ static int capidrv_command(isdn_ctrl *c, capidrv_contr *card) NULL, /* Useruserdata */ NULL /* Facilitydataarray */ ); - capi_cmsg2message(&cmdcmsg, cmdcmsg.buf); + if (capi_cmsg2message(&cmdcmsg, cmdcmsg.buf)) { + printk(KERN_ERR "capidrv-%d: capidrv_command: parser failure\n", + card->contrnr); + return -EINVAL; + } plci_change_state(card, bchan->plcip, EV_PLCI_CONNECT_RESP); send_message(card, &cmdcmsg); return 0; @@ -2090,7 +2102,11 @@ static int if_sendbuf(int id, int channel, int doack, struct sk_buff *skb) if (capidrv_add_ack(nccip, datahandle, doack ? (int)skb->len : -1) < 0) return 0; - capi_cmsg2message(&sendcmsg, sendcmsg.buf); + if (capi_cmsg2message(&sendcmsg, sendcmsg.buf)) { + printk(KERN_ERR "capidrv-%d: if_sendbuf: parser failure\n", + card->contrnr); + return -EINVAL; + } msglen = CAPIMSG_LEN(sendcmsg.buf); if (skb_headroom(skb) < msglen) { struct sk_buff *nskb = skb_realloc_headroom(skb, msglen); diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c index 4073d1684d0..36c1b37cea0 100644 --- a/drivers/isdn/capi/capiutil.c +++ b/drivers/isdn/capi/capiutil.c @@ -207,9 +207,24 @@ static unsigned command_2_index(unsigned c, unsigned sc) c = 0x9 + (c & 0x0f); else if (c == 0x41) c = 0x9 + 0x1; + if (c > 0x18) + c = 0x00; return (sc & 3) * (0x9 + 0x9) + c; } +/** + * capi_cmd2par() - find parameter string for CAPI 2.0 command/subcommand + * @cmd: command number + * @subcmd: subcommand number + * + * Return value: static string, NULL if command/subcommand unknown + */ + +static unsigned char *capi_cmd2par(u8 cmd, u8 subcmd) +{ + return cpars[command_2_index(cmd, subcmd)]; +} + /*-------------------------------------------------------*/ #define TYP (cdef[cmsg->par[cmsg->p]].typ) #define OFF (((u8 *)cmsg) + cdef[cmsg->par[cmsg->p]].off) @@ -302,7 +317,9 @@ unsigned capi_cmsg2message(_cmsg *cmsg, u8 *msg) cmsg->m = msg; cmsg->l = 8; cmsg->p = 0; - cmsg->par = cpars[command_2_index(cmsg->Command, cmsg->Subcommand)]; + cmsg->par = capi_cmd2par(cmsg->Command, cmsg->Subcommand); + if (!cmsg->par) + return 1; /* invalid command/subcommand */ pars_2_message(cmsg); @@ -375,7 +392,9 @@ unsigned capi_message2cmsg(_cmsg *cmsg, u8 *msg) cmsg->p = 0; byteTRcpy(cmsg->m + 4, &cmsg->Command); byteTRcpy(cmsg->m + 5, &cmsg->Subcommand); - cmsg->par = cpars[command_2_index(cmsg->Command, cmsg->Subcommand)]; + cmsg->par = capi_cmd2par(cmsg->Command, cmsg->Subcommand); + if (!cmsg->par) + return 1; /* invalid command/subcommand */ message_2_pars(cmsg); @@ -470,12 +489,17 @@ static char *mnames[] = * @cmd: command number * @subcmd: subcommand number * - * Return value: static string, NULL if command/subcommand unknown + * Return value: static string */ char *capi_cmd2str(u8 cmd, u8 subcmd) { - return mnames[command_2_index(cmd, subcmd)]; + char *result; + + result = mnames[command_2_index(cmd, subcmd)]; + if (result == NULL) + result = "INVALID_COMMAND"; + return result; } @@ -625,6 +649,9 @@ static _cdebbuf *printstruct(_cdebbuf *cdb, u8 *m) static _cdebbuf *protocol_message_2_pars(_cdebbuf *cdb, _cmsg *cmsg, int level) { + if (!cmsg->par) + return NULL; /* invalid command/subcommand */ + for (; TYP != _CEND; cmsg->p++) { int slen = 29 + 3 - level; int i; @@ -759,10 +786,10 @@ _cdebbuf *capi_message2str(u8 *msg) cmsg->p = 0; byteTRcpy(cmsg->m + 4, &cmsg->Command); byteTRcpy(cmsg->m + 5, &cmsg->Subcommand); - cmsg->par = cpars[command_2_index(cmsg->Command, cmsg->Subcommand)]; + cmsg->par = capi_cmd2par(cmsg->Command, cmsg->Subcommand); cdb = bufprint(cdb, "%-26s ID=%03d #0x%04x LEN=%04d\n", - mnames[command_2_index(cmsg->Command, cmsg->Subcommand)], + capi_cmd2str(cmsg->Command, cmsg->Subcommand), ((unsigned short *) msg)[1], ((unsigned short *) msg)[3], ((unsigned short *) msg)[0]); @@ -796,7 +823,7 @@ _cdebbuf *capi_cmsg2str(_cmsg *cmsg) cmsg->l = 8; cmsg->p = 0; cdb = bufprint(cdb, "%s ID=%03d #0x%04x LEN=%04d\n", - mnames[command_2_index(cmsg->Command, cmsg->Subcommand)], + capi_cmd2str(cmsg->Command, cmsg->Subcommand), ((u16 *) cmsg->m)[1], ((u16 *) cmsg->m)[3], ((u16 *) cmsg->m)[0]); diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index c123709acf8..823f6985b26 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1184,7 +1184,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) * Return value: CAPI result code */ -int capi20_manufacturer(unsigned int cmd, void __user *data) +int capi20_manufacturer(unsigned long cmd, void __user *data) { struct capi_ctr *ctr; int retval; @@ -1259,7 +1259,7 @@ int capi20_manufacturer(unsigned int cmd, void __user *data) } default: - printk(KERN_ERR "kcapi: manufacturer command %d unknown.\n", + printk(KERN_ERR "kcapi: manufacturer command %lu unknown.\n", cmd); break; diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c index 3286903a95d..ccec7778cad 100644 --- a/drivers/isdn/gigaset/capi.c +++ b/drivers/isdn/gigaset/capi.c @@ -250,6 +250,8 @@ static inline void dump_rawmsg(enum debuglevel level, const char *tag, l -= 12; if (l <= 0) return; + if (l > 64) + l = 64; /* arbitrary limit */ dbgline = kmalloc(3 * l, GFP_ATOMIC); if (!dbgline) return; @@ -645,7 +647,13 @@ int gigaset_isdn_icall(struct at_state_t *at_state) __func__); break; } - capi_cmsg2message(&iif->hcmsg, __skb_put(skb, msgsize)); + if (capi_cmsg2message(&iif->hcmsg, + __skb_put(skb, msgsize))) { + dev_err(cs->dev, "%s: message parser failure\n", + __func__); + dev_kfree_skb_any(skb); + break; + } dump_cmsg(DEBUG_CMD, __func__, &iif->hcmsg); /* add to listeners on this B channel, update state */ @@ -691,7 +699,12 @@ static void send_disconnect_ind(struct bc_state *bcs, dev_err(cs->dev, "%s: out of memory\n", __func__); return; } - capi_cmsg2message(&iif->hcmsg, __skb_put(skb, CAPI_DISCONNECT_IND_LEN)); + if (capi_cmsg2message(&iif->hcmsg, + __skb_put(skb, CAPI_DISCONNECT_IND_LEN))) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->hcmsg); capi_ctr_handle_message(&iif->ctr, ap->id, skb); } @@ -721,8 +734,12 @@ static void send_disconnect_b3_ind(struct bc_state *bcs, dev_err(cs->dev, "%s: out of memory\n", __func__); return; } - capi_cmsg2message(&iif->hcmsg, - __skb_put(skb, CAPI_DISCONNECT_B3_IND_BASELEN)); + if (capi_cmsg2message(&iif->hcmsg, + __skb_put(skb, CAPI_DISCONNECT_B3_IND_BASELEN))) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->hcmsg); capi_ctr_handle_message(&iif->ctr, ap->id, skb); } @@ -787,7 +804,11 @@ void gigaset_isdn_connD(struct bc_state *bcs) dev_err(cs->dev, "%s: out of memory\n", __func__); return; } - capi_cmsg2message(&iif->hcmsg, __skb_put(skb, msgsize)); + if (capi_cmsg2message(&iif->hcmsg, __skb_put(skb, msgsize))) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->hcmsg); capi_ctr_handle_message(&iif->ctr, ap->id, skb); } @@ -887,7 +908,11 @@ void gigaset_isdn_connB(struct bc_state *bcs) dev_err(cs->dev, "%s: out of memory\n", __func__); return; } - capi_cmsg2message(&iif->hcmsg, __skb_put(skb, msgsize)); + if (capi_cmsg2message(&iif->hcmsg, __skb_put(skb, msgsize))) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->hcmsg); capi_ctr_handle_message(&iif->ctr, ap->id, skb); } @@ -1094,13 +1119,19 @@ static void send_conf(struct gigaset_capi_ctr *iif, struct sk_buff *skb, u16 info) { + struct cardstate *cs = iif->ctr.driverdata; + /* * _CONF replies always only have NCCI and Info parameters * so they'll fit into the _REQ message skb */ capi_cmsg_answer(&iif->acmsg); iif->acmsg.Info = info; - capi_cmsg2message(&iif->acmsg, skb->data); + if (capi_cmsg2message(&iif->acmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } __skb_trim(skb, CAPI_STDCONF_LEN); dump_cmsg(DEBUG_CMD, __func__, &iif->acmsg); capi_ctr_handle_message(&iif->ctr, ap->id, skb); @@ -1122,7 +1153,11 @@ static void do_facility_req(struct gigaset_capi_ctr *iif, static u8 confparam[10]; /* max. 9 octets + length byte */ /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); /* @@ -1180,6 +1215,7 @@ static void do_facility_req(struct gigaset_capi_ctr *iif, confparam[3] = 2; /* length */ capimsg_setu16(confparam, 4, CapiSupplementaryServiceNotSupported); + break; } info = CapiSuccess; confparam[3] = 2; /* length */ @@ -1220,6 +1256,7 @@ static void do_facility_req(struct gigaset_capi_ctr *iif, } /* send FACILITY_CONF with given Info and confirmation parameter */ + dev_kfree_skb_any(skb); capi_cmsg_answer(cmsg); cmsg->Info = info; cmsg->FacilityConfirmationParameter = confparam; @@ -1229,7 +1266,11 @@ static void do_facility_req(struct gigaset_capi_ctr *iif, dev_err(cs->dev, "%s: out of memory\n", __func__); return; } - capi_cmsg2message(cmsg, __skb_put(cskb, msgsize)); + if (capi_cmsg2message(cmsg, __skb_put(cskb, msgsize))) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(cskb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); capi_ctr_handle_message(&iif->ctr, ap->id, cskb); } @@ -1243,8 +1284,14 @@ static void do_listen_req(struct gigaset_capi_ctr *iif, struct gigaset_capi_appl *ap, struct sk_buff *skb) { + struct cardstate *cs = iif->ctr.driverdata; + /* decode message */ - capi_message2cmsg(&iif->acmsg, skb->data); + if (capi_message2cmsg(&iif->acmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->acmsg); /* store listening parameters */ @@ -1261,8 +1308,14 @@ static void do_alert_req(struct gigaset_capi_ctr *iif, struct gigaset_capi_appl *ap, struct sk_buff *skb) { + struct cardstate *cs = iif->ctr.driverdata; + /* decode message */ - capi_message2cmsg(&iif->acmsg, skb->data); + if (capi_message2cmsg(&iif->acmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->acmsg); send_conf(iif, ap, skb, CapiAlertAlreadySent); } @@ -1287,7 +1340,11 @@ static void do_connect_req(struct gigaset_capi_ctr *iif, u16 info; /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); /* get free B channel & construct PLCI */ @@ -1574,7 +1631,11 @@ static void do_connect_resp(struct gigaset_capi_ctr *iif, int channel; /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); dev_kfree_skb_any(skb); @@ -1740,7 +1801,11 @@ static void do_connect_b3_req(struct gigaset_capi_ctr *iif, int channel; /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); /* extract and check channel number from PLCI */ @@ -1785,7 +1850,11 @@ static void do_connect_b3_resp(struct gigaset_capi_ctr *iif, u8 command; /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); /* extract and check channel number and NCCI */ @@ -1825,7 +1894,11 @@ static void do_connect_b3_resp(struct gigaset_capi_ctr *iif, capi_cmsg_header(cmsg, ap->id, command, CAPI_IND, ap->nextMessageNumber++, cmsg->adr.adrNCCI); __skb_trim(skb, msgsize); - capi_cmsg2message(cmsg, skb->data); + if (capi_cmsg2message(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); capi_ctr_handle_message(&iif->ctr, ap->id, skb); } @@ -1847,7 +1920,11 @@ static void do_disconnect_req(struct gigaset_capi_ctr *iif, int channel; /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); /* extract and check channel number from PLCI */ @@ -1903,8 +1980,14 @@ static void do_disconnect_req(struct gigaset_capi_ctr *iif, kfree(b3cmsg); return; } - capi_cmsg2message(b3cmsg, - __skb_put(b3skb, CAPI_DISCONNECT_B3_IND_BASELEN)); + if (capi_cmsg2message(b3cmsg, + __skb_put(b3skb, CAPI_DISCONNECT_B3_IND_BASELEN))) { + dev_err(cs->dev, "%s: message parser failure\n", + __func__); + kfree(b3cmsg); + dev_kfree_skb_any(b3skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, b3cmsg); kfree(b3cmsg); capi_ctr_handle_message(&iif->ctr, ap->id, b3skb); @@ -1935,7 +2018,11 @@ static void do_disconnect_b3_req(struct gigaset_capi_ctr *iif, int channel; /* decode message */ - capi_message2cmsg(cmsg, skb->data); + if (capi_message2cmsg(cmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, cmsg); /* extract and check channel number and NCCI */ @@ -2052,8 +2139,14 @@ static void do_reset_b3_req(struct gigaset_capi_ctr *iif, struct gigaset_capi_appl *ap, struct sk_buff *skb) { + struct cardstate *cs = iif->ctr.driverdata; + /* decode message */ - capi_message2cmsg(&iif->acmsg, skb->data); + if (capi_message2cmsg(&iif->acmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->acmsg); send_conf(iif, ap, skb, CapiResetProcedureNotSupportedByCurrentProtocol); @@ -2066,8 +2159,14 @@ static void do_unsupported(struct gigaset_capi_ctr *iif, struct gigaset_capi_appl *ap, struct sk_buff *skb) { + struct cardstate *cs = iif->ctr.driverdata; + /* decode message */ - capi_message2cmsg(&iif->acmsg, skb->data); + if (capi_message2cmsg(&iif->acmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->acmsg); send_conf(iif, ap, skb, CapiMessageNotSupportedInCurrentState); } @@ -2079,8 +2178,14 @@ static void do_nothing(struct gigaset_capi_ctr *iif, struct gigaset_capi_appl *ap, struct sk_buff *skb) { + struct cardstate *cs = iif->ctr.driverdata; + /* decode message */ - capi_message2cmsg(&iif->acmsg, skb->data); + if (capi_message2cmsg(&iif->acmsg, skb->data)) { + dev_err(cs->dev, "%s: message parser failure\n", __func__); + dev_kfree_skb_any(skb); + return; + } dump_cmsg(DEBUG_CMD, __func__, &iif->acmsg); dev_kfree_skb_any(skb); } @@ -2357,7 +2462,7 @@ int gigaset_isdn_regdev(struct cardstate *cs, const char *isdnid) struct gigaset_capi_ctr *iif; int rc; - iif = kmalloc(sizeof(*iif), GFP_KERNEL); + iif = kzalloc(sizeof(*iif), GFP_KERNEL); if (!iif) { pr_err("%s: out of memory\n", __func__); return -ENOMEM; @@ -2366,7 +2471,7 @@ int gigaset_isdn_regdev(struct cardstate *cs, const char *isdnid) /* prepare controller structure */ iif->ctr.owner = THIS_MODULE; iif->ctr.driverdata = cs; - strncpy(iif->ctr.name, isdnid, sizeof(iif->ctr.name)); + strncpy(iif->ctr.name, isdnid, sizeof(iif->ctr.name) - 1); iif->ctr.driver_name = "gigaset"; iif->ctr.load_firmware = NULL; iif->ctr.reset_ctr = NULL; diff --git a/drivers/isdn/gigaset/ev-layer.c b/drivers/isdn/gigaset/ev-layer.c index dcae14aef37..c8ced12fa45 100644 --- a/drivers/isdn/gigaset/ev-layer.c +++ b/drivers/isdn/gigaset/ev-layer.c @@ -604,14 +604,14 @@ void gigaset_handle_modem_response(struct cardstate *cs) } EXPORT_SYMBOL_GPL(gigaset_handle_modem_response); -/* disconnect +/* disconnect_nobc * process closing of connection associated with given AT state structure + * without B channel */ -static void disconnect(struct at_state_t **at_state_p) +static void disconnect_nobc(struct at_state_t **at_state_p, + struct cardstate *cs) { unsigned long flags; - struct bc_state *bcs = (*at_state_p)->bcs; - struct cardstate *cs = (*at_state_p)->cs; spin_lock_irqsave(&cs->lock, flags); ++(*at_state_p)->seq_index; @@ -622,23 +622,44 @@ static void disconnect(struct at_state_t **at_state_p) gig_dbg(DEBUG_EVENT, "Scheduling PC_UMMODE"); cs->commands_pending = 1; } - spin_unlock_irqrestore(&cs->lock, flags); - if (bcs) { - /* B channel assigned: invoke hardware specific handler */ - cs->ops->close_bchannel(bcs); - /* notify LL */ - if (bcs->chstate & (CHS_D_UP | CHS_NOTIFY_LL)) { - bcs->chstate &= ~(CHS_D_UP | CHS_NOTIFY_LL); - gigaset_isdn_hupD(bcs); - } - } else { - /* no B channel assigned: just deallocate */ - spin_lock_irqsave(&cs->lock, flags); + /* check for and deallocate temporary AT state */ + if (!list_empty(&(*at_state_p)->list)) { list_del(&(*at_state_p)->list); kfree(*at_state_p); *at_state_p = NULL; - spin_unlock_irqrestore(&cs->lock, flags); + } + + spin_unlock_irqrestore(&cs->lock, flags); +} + +/* disconnect_bc + * process closing of connection associated with given AT state structure + * and B channel + */ +static void disconnect_bc(struct at_state_t *at_state, + struct cardstate *cs, struct bc_state *bcs) +{ + unsigned long flags; + + spin_lock_irqsave(&cs->lock, flags); + ++at_state->seq_index; + + /* revert to selected idle mode */ + if (!cs->cidmode) { + cs->at_state.pending_commands |= PC_UMMODE; + gig_dbg(DEBUG_EVENT, "Scheduling PC_UMMODE"); + cs->commands_pending = 1; + } + spin_unlock_irqrestore(&cs->lock, flags); + + /* invoke hardware specific handler */ + cs->ops->close_bchannel(bcs); + + /* notify LL */ + if (bcs->chstate & (CHS_D_UP | CHS_NOTIFY_LL)) { + bcs->chstate &= ~(CHS_D_UP | CHS_NOTIFY_LL); + gigaset_isdn_hupD(bcs); } } @@ -646,7 +667,7 @@ static void disconnect(struct at_state_t **at_state_p) * get a free AT state structure: either one of those associated with the * B channels of the Gigaset device, or if none of those is available, * a newly allocated one with bcs=NULL - * The structure should be freed by calling disconnect() after use. + * The structure should be freed by calling disconnect_nobc() after use. */ static inline struct at_state_t *get_free_channel(struct cardstate *cs, int cid) @@ -1057,7 +1078,7 @@ static void do_action(int action, struct cardstate *cs, struct event_t *ev) { struct at_state_t *at_state = *p_at_state; - struct at_state_t *at_state2; + struct bc_state *bcs2; unsigned long flags; int channel; @@ -1156,8 +1177,8 @@ static void do_action(int action, struct cardstate *cs, break; case ACT_RING: /* get fresh AT state structure for new CID */ - at_state2 = get_free_channel(cs, ev->parameter); - if (!at_state2) { + at_state = get_free_channel(cs, ev->parameter); + if (!at_state) { dev_warn(cs->dev, "RING ignored: could not allocate channel structure\n"); break; @@ -1166,16 +1187,16 @@ static void do_action(int action, struct cardstate *cs, /* initialize AT state structure * note that bcs may be NULL if no B channel is free */ - at_state2->ConState = 700; + at_state->ConState = 700; for (i = 0; i < STR_NUM; ++i) { - kfree(at_state2->str_var[i]); - at_state2->str_var[i] = NULL; + kfree(at_state->str_var[i]); + at_state->str_var[i] = NULL; } - at_state2->int_var[VAR_ZCTP] = -1; + at_state->int_var[VAR_ZCTP] = -1; spin_lock_irqsave(&cs->lock, flags); - at_state2->timer_expires = RING_TIMEOUT; - at_state2->timer_active = 1; + at_state->timer_expires = RING_TIMEOUT; + at_state->timer_active = 1; spin_unlock_irqrestore(&cs->lock, flags); break; case ACT_ICALL: @@ -1213,14 +1234,17 @@ static void do_action(int action, struct cardstate *cs, case ACT_DISCONNECT: cs->cur_at_seq = SEQ_NONE; at_state->cid = -1; - if (bcs && cs->onechannel && cs->dle) { + if (!bcs) { + disconnect_nobc(p_at_state, cs); + } else if (cs->onechannel && cs->dle) { /* Check for other open channels not needed: * DLE only used for M10x with one B channel. */ at_state->pending_commands |= PC_DLE0; cs->commands_pending = 1; - } else - disconnect(p_at_state); + } else { + disconnect_bc(at_state, cs, bcs); + } break; case ACT_FAKEDLE0: at_state->int_var[VAR_ZDLE] = 0; @@ -1228,25 +1252,27 @@ static void do_action(int action, struct cardstate *cs, /* fall through */ case ACT_DLE0: cs->cur_at_seq = SEQ_NONE; - at_state2 = &cs->bcs[cs->curchannel].at_state; - disconnect(&at_state2); + bcs2 = cs->bcs + cs->curchannel; + disconnect_bc(&bcs2->at_state, cs, bcs2); break; case ACT_ABORTHUP: cs->cur_at_seq = SEQ_NONE; dev_warn(cs->dev, "Could not hang up.\n"); at_state->cid = -1; - if (bcs && cs->onechannel) + if (!bcs) + disconnect_nobc(p_at_state, cs); + else if (cs->onechannel) at_state->pending_commands |= PC_DLE0; else - disconnect(p_at_state); + disconnect_bc(at_state, cs, bcs); schedule_init(cs, MS_RECOVER); break; case ACT_FAILDLE0: cs->cur_at_seq = SEQ_NONE; dev_warn(cs->dev, "Error leaving DLE mode.\n"); cs->dle = 0; - at_state2 = &cs->bcs[cs->curchannel].at_state; - disconnect(&at_state2); + bcs2 = cs->bcs + cs->curchannel; + disconnect_bc(&bcs2->at_state, cs, bcs2); schedule_init(cs, MS_RECOVER); break; case ACT_FAILDLE1: @@ -1275,14 +1301,14 @@ static void do_action(int action, struct cardstate *cs, if (reinit_and_retry(cs, channel) < 0) { dev_warn(cs->dev, "Could not get a call ID. Cannot dial.\n"); - at_state2 = &cs->bcs[channel].at_state; - disconnect(&at_state2); + bcs2 = cs->bcs + channel; + disconnect_bc(&bcs2->at_state, cs, bcs2); } break; case ACT_ABORTCID: cs->cur_at_seq = SEQ_NONE; - at_state2 = &cs->bcs[cs->curchannel].at_state; - disconnect(&at_state2); + bcs2 = cs->bcs + cs->curchannel; + disconnect_bc(&bcs2->at_state, cs, bcs2); break; case ACT_DIALING: @@ -1291,7 +1317,10 @@ static void do_action(int action, struct cardstate *cs, break; case ACT_ABORTACCEPT: /* hangup/error/timeout during ICALL procssng */ - disconnect(p_at_state); + if (bcs) + disconnect_bc(at_state, cs, bcs); + else + disconnect_nobc(p_at_state, cs); break; case ACT_ABORTDIAL: /* error/timeout during dial preparation */ @@ -1380,6 +1409,11 @@ static void do_action(int action, struct cardstate *cs, /* events from the LL */ case ACT_DIAL: + if (!ev->ptr) { + *p_genresp = 1; + *p_resp_code = RSP_ERROR; + break; + } start_dial(at_state, ev->ptr, ev->parameter); break; case ACT_ACCEPT: diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index 82e91ba1acd..a8e652dac54 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -497,6 +497,7 @@ static int send_cb(struct cardstate *cs, struct cmdbuf_t *cb) static int gigaset_write_cmd(struct cardstate *cs, struct cmdbuf_t *cb) { unsigned long flags; + int len; gigaset_dbg_buffer(cs->mstate != MS_LOCKED ? DEBUG_TRANSCMD : DEBUG_LOCKCMD, @@ -515,10 +516,11 @@ static int gigaset_write_cmd(struct cardstate *cs, struct cmdbuf_t *cb) spin_unlock_irqrestore(&cs->cmdlock, flags); spin_lock_irqsave(&cs->lock, flags); + len = cb->len; if (cs->connected) tasklet_schedule(&cs->write_tasklet); spin_unlock_irqrestore(&cs->lock, flags); - return cb->len; + return len; } static int gigaset_write_room(struct cardstate *cs) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d4713d098a3..4dd2bb7167f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -842,6 +842,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, q->limits.logical_block_size = block_size; q->limits.physical_block_size = block_size; set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); + clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); blk_queue_flush(q, REQ_FLUSH|REQ_FUA); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 67f8b31e205..da3604e73e8 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -879,7 +879,6 @@ void bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; - int wait = 0; if (!bitmap || !bitmap->storage.filemap || test_bit(BITMAP_STALE, &bitmap->flags)) @@ -897,16 +896,13 @@ void bitmap_unplug(struct bitmap *bitmap) clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); write_page(bitmap, bitmap->storage.filemap[i], 0); } - if (dirty) - wait = 1; - } - if (wait) { /* if any writes were performed, we need to wait on them */ - if (bitmap->storage.file) - wait_event(bitmap->write_wait, - atomic_read(&bitmap->pending_writes)==0); - else - md_super_wait(bitmap->mddev); } + if (bitmap->storage.file) + wait_event(bitmap->write_wait, + atomic_read(&bitmap->pending_writes)==0); + else + md_super_wait(bitmap->mddev); + if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 0505559f096..825ca1f8763 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -465,6 +465,7 @@ static void __relink_lru(struct dm_buffer *b, int dirty) c->n_buffers[dirty]++; b->list_mode = dirty; list_move(&b->lru_list, &c->lru[dirty]); + b->last_accessed = jiffies; } /*---------------------------------------------------------------- @@ -1471,9 +1472,9 @@ static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { freed += __cleanup_old_buffer(b, gfp_mask, 0); if (!--nr_to_scan) - break; + return freed; + dm_bufio_cond_resched(); } - dm_bufio_cond_resched(); } return freed; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 51521429fb5..0be9381365d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1418,7 +1418,7 @@ static void retrieve_deps(struct dm_table *table, deps->count = count; count = 0; list_for_each_entry (dd, dm_table_get_devices(table), list) - deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev); + deps->dev[count++] = huge_encode_dev(dd->dm_dev->bdev->bd_dev); param->data_size = param->data_start + needed; } diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index b428c0ae63d..39ad9664d39 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -272,7 +272,7 @@ int dm_ulog_tfr_init(void) r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback); if (r) { - cn_del_callback(&ulog_cn_id); + kfree(prealloced_cn_msg); return r; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 833d7e752f0..7b6b0f0f831 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -317,8 +317,10 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes) struct priority_group *pg; unsigned bypassed = 1; - if (!m->nr_valid_paths) + if (!m->nr_valid_paths) { + m->queue_io = 0; goto failed; + } /* Were we instructed to switch PG? */ if (m->next_pg) { diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4880b69e2e9..4857fa4a548 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010-2011 Neil Brown - * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2010-2014 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -18,6 +18,8 @@ #define DM_MSG_PREFIX "raid" +static bool devices_handle_discard_safely = false; + /* * The following flags are used by dm-raid.c to set up the array state. * They must be cleared before md_run is called. @@ -475,6 +477,8 @@ too_many: * will form the "stripe" * [[no]sync] Force or prevent recovery of the * entire array + * [devices_handle_discard_safely] Allow discards on RAID4/5/6; useful if RAID + * member device(s) properly support TRIM/UNMAP * [rebuild <idx>] Rebuild the drive indicated by the index * [daemon_sleep <ms>] Time between bitmap daemon work to * clear bits @@ -1150,6 +1154,49 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) } /* + * Enable/disable discard support on RAID set depending on + * RAID level and discard properties of underlying RAID members. + */ +static void configure_discard_support(struct dm_target *ti, struct raid_set *rs) +{ + int i; + bool raid456; + + /* Assume discards not supported until after checks below. */ + ti->discards_supported = false; + + /* RAID level 4,5,6 require discard_zeroes_data for data integrity! */ + raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6); + + for (i = 0; i < rs->md.raid_disks; i++) { + struct request_queue *q = bdev_get_queue(rs->dev[i].rdev.bdev); + + if (!q || !blk_queue_discard(q)) + return; + + if (raid456) { + if (!q->limits.discard_zeroes_data) + return; + if (!devices_handle_discard_safely) { + DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty."); + DMERR("Set dm-raid.devices_handle_discard_safely=Y to override."); + return; + } + } + } + + /* All RAID members properly support discards */ + ti->discards_supported = true; + + /* + * RAID1 and RAID10 personalities require bio splitting, + * RAID0/4/5/6 don't and process large discard bios properly. + */ + ti->split_discard_bios = !!(rs->md.level == 1 || rs->md.level == 10); + ti->num_discard_bios = 1; +} + +/* * Construct a RAID4/5/6 mapping: * Args: * <raid_type> <#raid_params> <raid_params> \ @@ -1231,6 +1278,11 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->private = rs; ti->num_flush_bios = 1; + /* + * Disable/enable discard support on RAID set. + */ + configure_discard_support(ti, rs); + mutex_lock(&rs->md.reconfig_mutex); ret = md_run(&rs->md); rs->md.in_sync = 0; /* Assume already marked dirty */ @@ -1652,7 +1704,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 5, 2}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, @@ -1683,6 +1735,10 @@ static void __exit dm_raid_exit(void) module_init(dm_raid_init); module_exit(dm_raid_exit); +module_param(devices_handle_discard_safely, bool, 0644); +MODULE_PARM_DESC(devices_handle_discard_safely, + "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions"); + MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); MODULE_ALIAS("dm-raid1"); MODULE_ALIAS("dm-raid10"); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f9c6cb8dbcf..b2bd1ebf456 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -210,15 +210,16 @@ int dm_table_create(struct dm_table **result, fmode_t mode, return 0; } -static void free_devices(struct list_head *devices) +static void free_devices(struct list_head *devices, struct mapped_device *md) { struct list_head *tmp, *next; list_for_each_safe(tmp, next, devices) { struct dm_dev_internal *dd = list_entry(tmp, struct dm_dev_internal, list); - DMWARN("dm_table_destroy: dm_put_device call missing for %s", - dd->dm_dev.name); + DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s", + dm_device_name(md), dd->dm_dev->name); + dm_put_table_device(md, dd->dm_dev); kfree(dd); } } @@ -247,7 +248,7 @@ void dm_table_destroy(struct dm_table *t) vfree(t->highs); /* free the device list */ - free_devices(&t->devices); + free_devices(&t->devices, t->md); dm_free_md_mempools(t->mempools); @@ -262,53 +263,13 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) struct dm_dev_internal *dd; list_for_each_entry (dd, l, list) - if (dd->dm_dev.bdev->bd_dev == dev) + if (dd->dm_dev->bdev->bd_dev == dev) return dd; return NULL; } /* - * Open a device so we can use it as a map destination. - */ -static int open_dev(struct dm_dev_internal *d, dev_t dev, - struct mapped_device *md) -{ - static char *_claim_ptr = "I belong to device-mapper"; - struct block_device *bdev; - - int r; - - BUG_ON(d->dm_dev.bdev); - - bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - - r = bd_link_disk_holder(bdev, dm_disk(md)); - if (r) { - blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL); - return r; - } - - d->dm_dev.bdev = bdev; - return 0; -} - -/* - * Close a device that we've been using. - */ -static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) -{ - if (!d->dm_dev.bdev) - return; - - bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md)); - blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); - d->dm_dev.bdev = NULL; -} - -/* * If possible, this checks an area of a destination device is invalid. */ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, @@ -386,19 +347,17 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, struct mapped_device *md) { int r; - struct dm_dev_internal dd_new, dd_old; + struct dm_dev *old_dev, *new_dev; - dd_new = dd_old = *dd; + old_dev = dd->dm_dev; - dd_new.dm_dev.mode |= new_mode; - dd_new.dm_dev.bdev = NULL; - - r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md); + r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, + dd->dm_dev->mode | new_mode, &new_dev); if (r) return r; - dd->dm_dev.mode |= new_mode; - close_dev(&dd_old, md); + dd->dm_dev = new_dev; + dm_put_table_device(md, old_dev); return 0; } @@ -440,27 +399,22 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, if (!dd) return -ENOMEM; - dd->dm_dev.mode = mode; - dd->dm_dev.bdev = NULL; - - if ((r = open_dev(dd, dev, t->md))) { + if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) { kfree(dd); return r; } - format_dev_t(dd->dm_dev.name, dev); - atomic_set(&dd->count, 0); list_add(&dd->list, &t->devices); - } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { + } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { r = upgrade_mode(dd, mode, t->md); if (r) return r; } atomic_inc(&dd->count); - *result = &dd->dm_dev; + *result = dd->dm_dev; return 0; } EXPORT_SYMBOL(dm_get_device); @@ -505,11 +459,23 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, */ void dm_put_device(struct dm_target *ti, struct dm_dev *d) { - struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, - dm_dev); + int found = 0; + struct list_head *devices = &ti->table->devices; + struct dm_dev_internal *dd; + list_for_each_entry(dd, devices, list) { + if (dd->dm_dev == d) { + found = 1; + break; + } + } + if (!found) { + DMWARN("%s: device %s not in table devices list", + dm_device_name(ti->table->md), d->name); + return; + } if (atomic_dec_and_test(&dd->count)) { - close_dev(dd, ti->table->md); + dm_put_table_device(ti->table->md, d); list_del(&dd->list); kfree(dd); } @@ -906,7 +872,7 @@ static int dm_table_set_type(struct dm_table *t) /* Non-request-stackable devices can't be used for request-based dm */ devices = dm_table_get_devices(t); list_for_each_entry(dd, devices, list) { - if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) { + if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) { DMWARN("table load rejected: including" " non-request-stackable devices"); return -EINVAL; @@ -1043,7 +1009,7 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, struct gendisk *prev_disk = NULL, *template_disk = NULL; list_for_each_entry(dd, devices, list) { - template_disk = dd->dm_dev.bdev->bd_disk; + template_disk = dd->dm_dev->bdev->bd_disk; if (!blk_get_integrity(template_disk)) goto no_integrity; if (!match_all && !blk_integrity_is_initialized(template_disk)) @@ -1629,7 +1595,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) int r = 0; list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); + struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); char b[BDEVNAME_SIZE]; if (likely(q)) @@ -1637,7 +1603,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) else DMWARN_LIMIT("%s: any_congested: nonexistent device %s", dm_device_name(t->md), - bdevname(dd->dm_dev.bdev, b)); + bdevname(dd->dm_dev->bdev, b)); } list_for_each_entry(cb, &t->target_callbacks, list) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 32b958dbc49..58f3927fd7c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -142,6 +142,9 @@ struct mapped_device { */ struct dm_table *map; + struct list_head table_devices; + struct mutex table_devices_lock; + unsigned long flags; struct request_queue *queue; @@ -212,6 +215,12 @@ struct dm_md_mempools { struct bio_set *bs; }; +struct table_device { + struct list_head list; + atomic_t count; + struct dm_dev dm_dev; +}; + #define RESERVED_BIO_BASED_IOS 16 #define RESERVED_REQUEST_BASED_IOS 256 #define RESERVED_MAX_IOS 1024 @@ -670,6 +679,120 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) } /* + * Open a table device so we can use it as a map destination. + */ +static int open_table_device(struct table_device *td, dev_t dev, + struct mapped_device *md) +{ + static char *_claim_ptr = "I belong to device-mapper"; + struct block_device *bdev; + + int r; + + BUG_ON(td->dm_dev.bdev); + + bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + r = bd_link_disk_holder(bdev, dm_disk(md)); + if (r) { + blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); + return r; + } + + td->dm_dev.bdev = bdev; + return 0; +} + +/* + * Close a table device that we've been using. + */ +static void close_table_device(struct table_device *td, struct mapped_device *md) +{ + if (!td->dm_dev.bdev) + return; + + bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); + blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); + td->dm_dev.bdev = NULL; +} + +static struct table_device *find_table_device(struct list_head *l, dev_t dev, + fmode_t mode) { + struct table_device *td; + + list_for_each_entry(td, l, list) + if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode) + return td; + + return NULL; +} + +int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, + struct dm_dev **result) { + int r; + struct table_device *td; + + mutex_lock(&md->table_devices_lock); + td = find_table_device(&md->table_devices, dev, mode); + if (!td) { + td = kmalloc(sizeof(*td), GFP_KERNEL); + if (!td) { + mutex_unlock(&md->table_devices_lock); + return -ENOMEM; + } + + td->dm_dev.mode = mode; + td->dm_dev.bdev = NULL; + + if ((r = open_table_device(td, dev, md))) { + mutex_unlock(&md->table_devices_lock); + kfree(td); + return r; + } + + format_dev_t(td->dm_dev.name, dev); + + atomic_set(&td->count, 0); + list_add(&td->list, &md->table_devices); + } + atomic_inc(&td->count); + mutex_unlock(&md->table_devices_lock); + + *result = &td->dm_dev; + return 0; +} +EXPORT_SYMBOL_GPL(dm_get_table_device); + +void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) +{ + struct table_device *td = container_of(d, struct table_device, dm_dev); + + mutex_lock(&md->table_devices_lock); + if (atomic_dec_and_test(&td->count)) { + close_table_device(td, md); + list_del(&td->list); + kfree(td); + } + mutex_unlock(&md->table_devices_lock); +} +EXPORT_SYMBOL(dm_put_table_device); + +static void free_table_devices(struct list_head *devices) +{ + struct list_head *tmp, *next; + + list_for_each_safe(tmp, next, devices) { + struct table_device *td = list_entry(tmp, struct table_device, list); + + DMWARN("dm_destroy: %s still exists with %d references", + td->dm_dev.name, atomic_read(&td->count)); + kfree(td); + } +} + +/* * Get the geometry associated with a dm device */ int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) @@ -1249,13 +1372,13 @@ static void clone_bio(struct dm_target_io *tio, struct bio *bio, } static struct dm_target_io *alloc_tio(struct clone_info *ci, - struct dm_target *ti, int nr_iovecs, + struct dm_target *ti, unsigned target_bio_nr) { struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, ci->md->bs); + clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs); tio = container_of(clone, struct dm_target_io, clone); tio->io = ci->io; @@ -1269,17 +1392,12 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target *ti, unsigned target_bio_nr, unsigned *len) { - struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); + struct dm_target_io *tio = alloc_tio(ci, ti, target_bio_nr); struct bio *clone = &tio->clone; tio->len_ptr = len; - /* - * Discard requests require the bio's inline iovecs be initialized. - * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush - * and discard, so no need for concern about wasted bvec allocations. - */ - __bio_clone_fast(clone, ci->bio); + __bio_clone_fast(clone, ci->bio); if (len) bio_setup_sector(clone, ci->sector, *len); @@ -1322,7 +1440,7 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti num_target_bios = ti->num_write_bios(ti, bio); for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { - tio = alloc_tio(ci, ti, 0, target_bio_nr); + tio = alloc_tio(ci, ti, target_bio_nr); tio->len_ptr = len; clone_bio(tio, bio, sector, *len); __map_bio(tio); @@ -1949,12 +2067,14 @@ static struct mapped_device *alloc_dev(int minor) md->type = DM_TYPE_NONE; mutex_init(&md->suspend_lock); mutex_init(&md->type_lock); + mutex_init(&md->table_devices_lock); spin_lock_init(&md->deferred_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); atomic_set(&md->uevent_seq, 0); INIT_LIST_HEAD(&md->uevent_list); + INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); md->queue = blk_alloc_queue(GFP_KERNEL); @@ -2040,6 +2160,7 @@ static void free_dev(struct mapped_device *md) blk_integrity_unregister(md->disk); del_gendisk(md->disk); cleanup_srcu_struct(&md->io_barrier); + free_table_devices(&md->table_devices); free_minor(minor); spin_lock(&_minor_lock); @@ -2900,7 +3021,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u if (!pools->io_pool) goto out; - pools->bs = bioset_create(pool_size, front_pad); + pools->bs = bioset_create_nobvec(pool_size, front_pad); if (!pools->bs) goto out; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index e81d2152fa6..988c7fb7b14 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -44,7 +44,7 @@ struct dm_dev_internal { struct list_head list; atomic_t count; - struct dm_dev dm_dev; + struct dm_dev *dm_dev; }; struct dm_table; @@ -188,6 +188,9 @@ int dm_cancel_deferred_remove(struct mapped_device *md); int dm_request_based(struct mapped_device *md); sector_t dm_get_size(struct mapped_device *md); struct request_queue *dm_get_md_queue(struct mapped_device *md); +int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, + struct dm_dev **result); +void dm_put_table_device(struct mapped_device *md, struct dm_dev *d); struct dm_stats *dm_get_stats(struct mapped_device *md); int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 56f534b4a2d..64713b77df1 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -10,10 +10,10 @@ it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/blkdev.h> @@ -25,7 +25,7 @@ #include "linear.h" /* - * find which device holds a particular offset + * find which device holds a particular offset */ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) { @@ -355,7 +355,6 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } - static struct md_personality linear_personality = { .name = "linear", @@ -379,7 +378,6 @@ static void linear_exit (void) unregister_md_personality (&linear_personality); } - module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/md/md.c b/drivers/md/md.c index 1294238610d..4dfa15da9cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1,6 +1,6 @@ /* md.c : Multiple Devices driver for Linux - Copyright (C) 1998, 1999, 2000 Ingo Molnar + Copyright (C) 1998, 1999, 2000 Ingo Molnar completely rewritten, based on the MD driver code from Marc Zyngier @@ -66,8 +66,6 @@ static void autostart_arrays(int part); static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); -static void md_print_devices(void); - static DECLARE_WAIT_QUEUE_HEAD(resync_wait); static struct workqueue_struct *md_wq; static struct workqueue_struct *md_misc_wq; @@ -75,8 +73,6 @@ static struct workqueue_struct *md_misc_wq; static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); -#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } - /* * Default number of read corrections we'll attempt on an rdev * before ejecting it from the array. We divide the read error @@ -218,7 +214,6 @@ static void md_new_event_inintr(struct mddev *mddev) static LIST_HEAD(all_mddevs); static DEFINE_SPINLOCK(all_mddevs_lock); - /* * iterates through all used mddevs in the system. * We take care to grab the all_mddevs_lock whenever navigating @@ -228,7 +223,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); */ #define for_each_mddev(_mddev,_tmp) \ \ - for (({ spin_lock(&all_mddevs_lock); \ + for (({ spin_lock(&all_mddevs_lock); \ _tmp = all_mddevs.next; \ _mddev = NULL;}); \ ({ if (_tmp != &all_mddevs) \ @@ -241,7 +236,6 @@ static DEFINE_SPINLOCK(all_mddevs_lock); _tmp = _tmp->next;}) \ ) - /* Rather than calling directly into the personality make_request function, * IO requests come here first so that we can check if the device is * being suspended pending a reconfiguration. @@ -488,7 +482,7 @@ void mddev_init(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_init); -static struct mddev * mddev_find(dev_t unit) +static struct mddev *mddev_find(dev_t unit) { struct mddev *mddev, *new = NULL; @@ -530,7 +524,7 @@ static struct mddev * mddev_find(dev_t unit) kfree(new); return NULL; } - + is_free = 1; list_for_each_entry(mddev, &all_mddevs, all_mddevs) if (mddev->unit == dev) { @@ -562,7 +556,7 @@ static struct mddev * mddev_find(dev_t unit) goto retry; } -static inline int __must_check mddev_lock(struct mddev * mddev) +static inline int __must_check mddev_lock(struct mddev *mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); } @@ -570,7 +564,7 @@ static inline int __must_check mddev_lock(struct mddev * mddev) /* Sometimes we need to take the lock in a situation where * failure due to interrupts is not acceptable. */ -static inline void mddev_lock_nointr(struct mddev * mddev) +static inline void mddev_lock_nointr(struct mddev *mddev) { mutex_lock(&mddev->reconfig_mutex); } @@ -580,14 +574,14 @@ static inline int mddev_is_locked(struct mddev *mddev) return mutex_is_locked(&mddev->reconfig_mutex); } -static inline int mddev_trylock(struct mddev * mddev) +static inline int mddev_trylock(struct mddev *mddev) { return mutex_trylock(&mddev->reconfig_mutex); } static struct attribute_group md_redundancy_group; -static void mddev_unlock(struct mddev * mddev) +static void mddev_unlock(struct mddev *mddev) { if (mddev->to_remove) { /* These cannot be removed under reconfig_mutex as @@ -630,17 +624,6 @@ static void mddev_unlock(struct mddev * mddev) spin_unlock(&pers_lock); } -static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr) -{ - struct md_rdev *rdev; - - rdev_for_each(rdev, mddev) - if (rdev->desc_nr == nr) - return rdev; - - return NULL; -} - static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr) { struct md_rdev *rdev; @@ -693,11 +676,8 @@ static inline sector_t calc_dev_sboffset(struct md_rdev *rdev) return MD_NEW_SIZE_SECTORS(num_sectors); } -static int alloc_disk_sb(struct md_rdev * rdev) +static int alloc_disk_sb(struct md_rdev *rdev) { - if (rdev->sb_page) - MD_BUG(); - rdev->sb_page = alloc_page(GFP_KERNEL); if (!rdev->sb_page) { printk(KERN_ALERT "md: out of memory.\n"); @@ -766,14 +746,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, void md_super_wait(struct mddev *mddev) { /* wait for all superblock writes that were scheduled to complete */ - DEFINE_WAIT(wq); - for(;;) { - prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); - if (atomic_read(&mddev->pending_writes)==0) - break; - schedule(); - } - finish_wait(&mddev->sb_wait, &wq); + wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); } int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, @@ -801,17 +774,13 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, } EXPORT_SYMBOL_GPL(sync_page_io); -static int read_disk_sb(struct md_rdev * rdev, int size) +static int read_disk_sb(struct md_rdev *rdev, int size) { char b[BDEVNAME_SIZE]; - if (!rdev->sb_page) { - MD_BUG(); - return -EINVAL; - } + if (rdev->sb_loaded) return 0; - if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true)) goto fail; rdev->sb_loaded = 1; @@ -825,7 +794,7 @@ fail: static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) { - return sb1->set_uuid0 == sb2->set_uuid0 && + return sb1->set_uuid0 == sb2->set_uuid0 && sb1->set_uuid1 == sb2->set_uuid1 && sb1->set_uuid2 == sb2->set_uuid2 && sb1->set_uuid3 == sb2->set_uuid3; @@ -861,14 +830,13 @@ abort: return ret; } - static u32 md_csum_fold(u32 csum) { csum = (csum & 0xffff) + (csum >> 16); return (csum & 0xffff) + (csum >> 16); } -static unsigned int calc_sb_csum(mdp_super_t * sb) +static unsigned int calc_sb_csum(mdp_super_t *sb) { u64 newcsum = 0; u32 *sb32 = (u32*)sb; @@ -882,7 +850,6 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) newcsum += sb32[i]; csum = (newcsum & 0xffffffff) + (newcsum>>32); - #ifdef CONFIG_ALPHA /* This used to use csum_partial, which was wrong for several * reasons including that different results are returned on @@ -899,7 +866,6 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) return csum; } - /* * Handle superblock details. * We want to be able to handle multiple superblock formats @@ -965,7 +931,7 @@ int md_check_no_bitmap(struct mddev *mddev) EXPORT_SYMBOL(md_check_no_bitmap); /* - * load_super for 0.90.0 + * load_super for 0.90.0 */ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version) { @@ -1044,7 +1010,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor ev2 = md_event(refsb); if (ev1 > ev2) ret = 1; - else + else ret = 0; } rdev->sectors = rdev->sb_start; @@ -1118,7 +1084,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) if (sb->state & (1<<MD_SB_CLEAN)) mddev->recovery_cp = MaxSector; else { - if (sb->events_hi == sb->cp_events_hi && + if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { mddev->recovery_cp = sb->recovery_cp; } else @@ -1146,7 +1112,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) ++ev1; if (sb->disks[rdev->desc_nr].state & ( (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))) - if (ev1 < mddev->events) + if (ev1 < mddev->events) return -EINVAL; } else if (mddev->bitmap) { /* if adding to array with a bitmap, then we can accept an @@ -1197,7 +1163,6 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) struct md_rdev *rdev2; int next_spare = mddev->raid_disks; - /* make rdev->sb match mddev data.. * * 1/ zero out disks @@ -1366,7 +1331,7 @@ super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset) * version 1 superblock */ -static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb) +static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb) { __le32 disk_csum; u32 csum; @@ -1430,7 +1395,6 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ ret = read_disk_sb(rdev, 4096); if (ret) return ret; - sb = page_address(rdev->sb_page); if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || @@ -1817,7 +1781,7 @@ retry: for (i=0; i<max_dev;i++) sb->dev_roles[i] = cpu_to_le16(0xfffe); - + rdev_for_each(rdev2, mddev) { i = rdev2->desc_nr; if (test_bit(Faulty, &rdev2->flags)) @@ -2033,18 +1997,13 @@ void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) } EXPORT_SYMBOL(md_integrity_add_rdev); -static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev) +static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) { char b[BDEVNAME_SIZE]; struct kobject *ko; char *s; int err; - if (rdev->mddev) { - MD_BUG(); - return -EINVAL; - } - /* prevent duplicates */ if (find_rdev(mddev, rdev->bdev->bd_dev)) return -EEXIST; @@ -2067,16 +2026,21 @@ static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev) * If it is -1, assign a free number, else * check number is not in use */ + rcu_read_lock(); if (rdev->desc_nr < 0) { int choice = 0; - if (mddev->pers) choice = mddev->raid_disks; - while (find_rdev_nr(mddev, choice)) + if (mddev->pers) + choice = mddev->raid_disks; + while (find_rdev_nr_rcu(mddev, choice)) choice++; rdev->desc_nr = choice; } else { - if (find_rdev_nr(mddev, rdev->desc_nr)) + if (find_rdev_nr_rcu(mddev, rdev->desc_nr)) { + rcu_read_unlock(); return -EBUSY; + } } + rcu_read_unlock(); if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) { printk(KERN_WARNING "md: %s: array is limited to %d devices\n", mdname(mddev), mddev->max_disks); @@ -2118,13 +2082,10 @@ static void md_delayed_delete(struct work_struct *ws) kobject_put(&rdev->kobj); } -static void unbind_rdev_from_array(struct md_rdev * rdev) +static void unbind_rdev_from_array(struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; - if (!rdev->mddev) { - MD_BUG(); - return; - } + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); @@ -2169,20 +2130,17 @@ static void unlock_rdev(struct md_rdev *rdev) { struct block_device *bdev = rdev->bdev; rdev->bdev = NULL; - if (!bdev) - MD_BUG(); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } void md_autodetect_dev(dev_t dev); -static void export_rdev(struct md_rdev * rdev) +static void export_rdev(struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; + printk(KERN_INFO "md: export_rdev(%s)\n", bdevname(rdev->bdev,b)); - if (rdev->mddev) - MD_BUG(); md_rdev_clear(rdev); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) @@ -2192,7 +2150,7 @@ static void export_rdev(struct md_rdev * rdev) kobject_put(&rdev->kobj); } -static void kick_rdev_from_array(struct md_rdev * rdev) +static void kick_rdev_from_array(struct md_rdev *rdev) { unbind_rdev_from_array(rdev); export_rdev(rdev); @@ -2200,153 +2158,18 @@ static void kick_rdev_from_array(struct md_rdev * rdev) static void export_array(struct mddev *mddev) { - struct md_rdev *rdev, *tmp; + struct md_rdev *rdev; - rdev_for_each_safe(rdev, tmp, mddev) { - if (!rdev->mddev) { - MD_BUG(); - continue; - } + while (!list_empty(&mddev->disks)) { + rdev = list_first_entry(&mddev->disks, struct md_rdev, + same_set); kick_rdev_from_array(rdev); } - if (!list_empty(&mddev->disks)) - MD_BUG(); mddev->raid_disks = 0; mddev->major_version = 0; } -static void print_desc(mdp_disk_t *desc) -{ - printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number, - desc->major,desc->minor,desc->raid_disk,desc->state); -} - -static void print_sb_90(mdp_super_t *sb) -{ - int i; - - printk(KERN_INFO - "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n", - sb->major_version, sb->minor_version, sb->patch_version, - sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3, - sb->ctime); - printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", - sb->level, sb->size, sb->nr_disks, sb->raid_disks, - sb->md_minor, sb->layout, sb->chunk_size); - printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d" - " FD:%d SD:%d CSUM:%08x E:%08lx\n", - sb->utime, sb->state, sb->active_disks, sb->working_disks, - sb->failed_disks, sb->spare_disks, - sb->sb_csum, (unsigned long)sb->events_lo); - - printk(KERN_INFO); - for (i = 0; i < MD_SB_DISKS; i++) { - mdp_disk_t *desc; - - desc = sb->disks + i; - if (desc->number || desc->major || desc->minor || - desc->raid_disk || (desc->state && (desc->state != 4))) { - printk(" D %2d: ", i); - print_desc(desc); - } - } - printk(KERN_INFO "md: THIS: "); - print_desc(&sb->this_disk); -} - -static void print_sb_1(struct mdp_superblock_1 *sb) -{ - __u8 *uuid; - - uuid = sb->set_uuid; - printk(KERN_INFO - "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n" - "md: Name: \"%s\" CT:%llu\n", - le32_to_cpu(sb->major_version), - le32_to_cpu(sb->feature_map), - uuid, - sb->set_name, - (unsigned long long)le64_to_cpu(sb->ctime) - & MD_SUPERBLOCK_1_TIME_SEC_MASK); - - uuid = sb->device_uuid; - printk(KERN_INFO - "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" - " RO:%llu\n" - "md: Dev:%08x UUID: %pU\n" - "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" - "md: (MaxDev:%u) \n", - le32_to_cpu(sb->level), - (unsigned long long)le64_to_cpu(sb->size), - le32_to_cpu(sb->raid_disks), - le32_to_cpu(sb->layout), - le32_to_cpu(sb->chunksize), - (unsigned long long)le64_to_cpu(sb->data_offset), - (unsigned long long)le64_to_cpu(sb->data_size), - (unsigned long long)le64_to_cpu(sb->super_offset), - (unsigned long long)le64_to_cpu(sb->recovery_offset), - le32_to_cpu(sb->dev_number), - uuid, - sb->devflags, - (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, - (unsigned long long)le64_to_cpu(sb->events), - (unsigned long long)le64_to_cpu(sb->resync_offset), - le32_to_cpu(sb->sb_csum), - le32_to_cpu(sb->max_dev) - ); -} - -static void print_rdev(struct md_rdev *rdev, int major_version) -{ - char b[BDEVNAME_SIZE]; - printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n", - bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors, - test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), - rdev->desc_nr); - if (rdev->sb_loaded) { - printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version); - switch (major_version) { - case 0: - print_sb_90(page_address(rdev->sb_page)); - break; - case 1: - print_sb_1(page_address(rdev->sb_page)); - break; - } - } else - printk(KERN_INFO "md: no rdev superblock!\n"); -} - -static void md_print_devices(void) -{ - struct list_head *tmp; - struct md_rdev *rdev; - struct mddev *mddev; - char b[BDEVNAME_SIZE]; - - printk("\n"); - printk("md: **********************************\n"); - printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); - printk("md: **********************************\n"); - for_each_mddev(mddev, tmp) { - - if (mddev->bitmap) - bitmap_print_sb(mddev->bitmap); - else - printk("%s: ", mdname(mddev)); - rdev_for_each(rdev, mddev) - printk("<%s>", bdevname(rdev->bdev,b)); - printk("\n"); - - rdev_for_each(rdev, mddev) - print_rdev(rdev, mddev->major_version); - } - printk("md: **********************************\n"); - printk("\n"); -} - - -static void sync_sbs(struct mddev * mddev, int nospares) +static void sync_sbs(struct mddev *mddev, int nospares) { /* Update each superblock (in-memory image), but * if we are allowed to, skip spares which already @@ -2369,7 +2192,7 @@ static void sync_sbs(struct mddev * mddev, int nospares) } } -static void md_update_sb(struct mddev * mddev, int force_change) +static void md_update_sb(struct mddev *mddev, int force_change) { struct md_rdev *rdev; int sync_req; @@ -2390,7 +2213,7 @@ repeat: mddev->curr_resync_completed > rdev->recovery_offset) rdev->recovery_offset = mddev->curr_resync_completed; - } + } if (!mddev->persistent) { clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -2453,15 +2276,12 @@ repeat: mddev->can_decrease_events = nospares; } - if (!mddev->events) { - /* - * oops, this 64-bit counter should never wrap. - * Either we are in around ~1 trillion A.C., assuming - * 1 reboot per second, or we have a bug: - */ - MD_BUG(); - mddev->events --; - } + /* + * This 64-bit counter should never wrap. + * Either we are in around ~1 trillion A.C., assuming + * 1 reboot per second, or we have a bug... + */ + WARN_ON(mddev->events == 0); rdev_for_each(rdev, mddev) { if (rdev->badblocks.changed) @@ -2668,10 +2488,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) set_bit(In_sync, &rdev->flags); err = 0; } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) { - clear_bit(In_sync, &rdev->flags); - rdev->saved_raid_disk = rdev->raid_disk; - rdev->raid_disk = -1; - err = 0; + if (rdev->mddev->pers == NULL) { + clear_bit(In_sync, &rdev->flags); + rdev->saved_raid_disk = rdev->raid_disk; + rdev->raid_disk = -1; + err = 0; + } } else if (cmd_match(buf, "write_error")) { set_bit(WriteErrorSeen, &rdev->flags); err = 0; @@ -2829,7 +2651,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) return len; } - static struct rdev_sysfs_entry rdev_slot = __ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store); @@ -2980,20 +2801,20 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) rdev->sectors = sectors; if (sectors > oldsectors && my_mddev->external) { - /* need to check that all other rdevs with the same ->bdev - * do not overlap. We need to unlock the mddev to avoid - * a deadlock. We have already changed rdev->sectors, and if - * we have to change it back, we will have the lock again. + /* Need to check that all other rdevs with the same + * ->bdev do not overlap. 'rcu' is sufficient to walk + * the rdev lists safely. + * This check does not provide a hard guarantee, it + * just helps avoid dangerous mistakes. */ struct mddev *mddev; int overlap = 0; struct list_head *tmp; - mddev_unlock(my_mddev); + rcu_read_lock(); for_each_mddev(mddev, tmp) { struct md_rdev *rdev2; - mddev_lock_nointr(mddev); rdev_for_each(rdev2, mddev) if (rdev->bdev == rdev2->bdev && rdev != rdev2 && @@ -3003,13 +2824,12 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) overlap = 1; break; } - mddev_unlock(mddev); if (overlap) { mddev_put(mddev); break; } } - mddev_lock_nointr(my_mddev); + rcu_read_unlock(); if (overlap) { /* Someone else could have slipped in a size * change here, but doing so is just silly. @@ -3027,7 +2847,6 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) static struct rdev_sysfs_entry rdev_size = __ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); - static ssize_t recovery_start_show(struct md_rdev *rdev, char *page) { unsigned long long recovery_start = rdev->recovery_offset; @@ -3063,7 +2882,6 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_ static struct rdev_sysfs_entry rdev_recovery_start = __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); - static ssize_t badblocks_show(struct badblocks *bb, char *page, int unack); static ssize_t @@ -3084,7 +2902,6 @@ static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len) static struct rdev_sysfs_entry rdev_bad_blocks = __ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store); - static ssize_t ubb_show(struct md_rdev *rdev, char *page) { return badblocks_show(&rdev->badblocks, page, 1); @@ -3241,7 +3058,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS; if (!size) { - printk(KERN_WARNING + printk(KERN_WARNING "md: %s has zero or unknown size, marking faulty!\n", bdevname(rdev->bdev,b)); err = -EINVAL; @@ -3260,7 +3077,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe goto abort_free; } if (err < 0) { - printk(KERN_WARNING + printk(KERN_WARNING "md: could not read %s's sb, not importing!\n", bdevname(rdev->bdev,b)); goto abort_free; @@ -3281,8 +3098,7 @@ abort_free: * Check a full RAID array for plausibility */ - -static void analyze_sbs(struct mddev * mddev) +static void analyze_sbs(struct mddev *mddev) { int i; struct md_rdev *rdev, *freshest, *tmp; @@ -3300,12 +3116,11 @@ static void analyze_sbs(struct mddev * mddev) default: printk( KERN_ERR \ "md: fatal superblock inconsistency in %s" - " -- removing from array\n", + " -- removing from array\n", bdevname(rdev->bdev,b)); kick_rdev_from_array(rdev); } - super_types[mddev->major_version]. validate_super(mddev, freshest); @@ -3344,7 +3159,7 @@ static void analyze_sbs(struct mddev * mddev) /* Read a fixed-point number. * Numbers in sysfs attributes should be in "standard" units where * possible, so time should be in seconds. - * However we internally use a a much smaller unit such as + * However we internally use a a much smaller unit such as * milliseconds or jiffies. * This function takes a decimal number with a possible fractional * component, and produces an integer which is the result of @@ -3381,7 +3196,6 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) return 0; } - static void md_safemode_timeout(unsigned long data); static ssize_t @@ -3524,7 +3338,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) /* Looks like we have a winner */ mddev_suspend(mddev); mddev->pers->stop(mddev); - + if (mddev->pers->sync_request == NULL && pers->sync_request != NULL) { /* need to add the md_redundancy_group */ @@ -3533,7 +3347,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) "md: cannot register extra attributes for %s\n", mdname(mddev)); mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); - } + } if (mddev->pers->sync_request != NULL && pers->sync_request == NULL) { /* need to remove the md_redundancy_group */ @@ -3611,7 +3425,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry md_level = __ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store); - static ssize_t layout_show(struct mddev *mddev, char *page) { @@ -3654,7 +3467,6 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry md_layout = __ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); - static ssize_t raid_disks_show(struct mddev *mddev, char *page) { @@ -3859,9 +3671,9 @@ array_state_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", array_states[st]); } -static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev); -static int md_set_readonly(struct mddev * mddev, struct block_device *bdev); -static int do_md_run(struct mddev * mddev); +static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev); +static int md_set_readonly(struct mddev *mddev, struct block_device *bdev); +static int do_md_run(struct mddev *mddev); static int restart_array(struct mddev *mddev); static ssize_t @@ -4012,7 +3824,6 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) minor != MINOR(dev)) return -EOVERFLOW; - if (mddev->persistent) { rdev = md_import_device(dev, mddev->major_version, mddev->minor_version); @@ -4108,7 +3919,6 @@ size_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry md_size = __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); - /* Metadata version. * This is one of * 'none' for arrays with no metadata (good luck...) @@ -4490,7 +4300,7 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) unsigned long long new = simple_strtoull(buf, &e, 10); unsigned long long old = mddev->suspend_lo; - if (mddev->pers == NULL || + if (mddev->pers == NULL || mddev->pers->quiesce == NULL) return -EINVAL; if (buf == e || (*e && *e != '\n')) @@ -4510,7 +4320,6 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry md_suspend_lo = __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); - static ssize_t suspend_hi_show(struct mddev *mddev, char *page) { @@ -4698,7 +4507,6 @@ static struct attribute_group md_redundancy_group = { .attrs = md_redundancy_attrs, }; - static ssize_t md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { @@ -5111,7 +4919,7 @@ int md_run(struct mddev *mddev) } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; - atomic_set(&mddev->writes_pending,0); + atomic_set(&mddev->writes_pending,0); atomic_set(&mddev->max_corr_read_errors, MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); mddev->safemode = 0; @@ -5125,9 +4933,9 @@ int md_run(struct mddev *mddev) if (rdev->raid_disk >= 0) if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; - + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - + if (mddev->flags & MD_UPDATE_SB_FLAGS) md_update_sb(mddev, 0); @@ -5307,7 +5115,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > !!bdev || + if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sync_thread || (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { printk("md: %s still in use.\n",mdname(mddev)); @@ -5339,7 +5147,7 @@ out: * 0 - completely stop and dis-assemble array * 2 - stop but do not disassemble array */ -static int do_md_stop(struct mddev * mddev, int mode, +static int do_md_stop(struct mddev *mddev, int mode, struct block_device *bdev) { struct gendisk *disk = mddev->gendisk; @@ -5362,7 +5170,7 @@ static int do_md_stop(struct mddev * mddev, int mode, mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > !!bdev || + if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sysfs_active || mddev->sync_thread || (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { @@ -5512,12 +5320,12 @@ static void autorun_devices(int part) "md: cannot allocate memory for md drive.\n"); break; } - if (mddev_lock(mddev)) + if (mddev_lock(mddev)) printk(KERN_WARNING "md: %s locked, cannot run\n", mdname(mddev)); else if (mddev->raid_disks || mddev->major_version || !list_empty(&mddev->disks)) { - printk(KERN_WARNING + printk(KERN_WARNING "md: %s already running, cannot run %s\n", mdname(mddev), bdevname(rdev0->bdev,b)); mddev_unlock(mddev); @@ -5545,7 +5353,7 @@ static void autorun_devices(int part) } #endif /* !MODULE */ -static int get_version(void __user * arg) +static int get_version(void __user *arg) { mdu_version_t ver; @@ -5559,7 +5367,7 @@ static int get_version(void __user * arg) return 0; } -static int get_array_info(struct mddev * mddev, void __user * arg) +static int get_array_info(struct mddev *mddev, void __user *arg) { mdu_array_info_t info; int nr,working,insync,failed,spare; @@ -5574,7 +5382,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) else { working++; if (test_bit(In_sync, &rdev->flags)) - insync++; + insync++; else spare++; } @@ -5614,7 +5422,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) return 0; } -static int get_bitmap_file(struct mddev * mddev, void __user * arg) +static int get_bitmap_file(struct mddev *mddev, void __user * arg) { mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ char *ptr, *buf = NULL; @@ -5652,7 +5460,7 @@ out: return err; } -static int get_disk_info(struct mddev * mddev, void __user * arg) +static int get_disk_info(struct mddev *mddev, void __user * arg) { mdu_disk_info_t info; struct md_rdev *rdev; @@ -5688,7 +5496,7 @@ static int get_disk_info(struct mddev * mddev, void __user * arg) return 0; } -static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) +static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) { char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; struct md_rdev *rdev; @@ -5702,7 +5510,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) /* expecting a device which has a superblock */ rdev = md_import_device(dev, mddev->major_version, mddev->minor_version); if (IS_ERR(rdev)) { - printk(KERN_WARNING + printk(KERN_WARNING "md: md_import_device returned %ld\n", PTR_ERR(rdev)); return PTR_ERR(rdev); @@ -5714,9 +5522,9 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) err = super_types[mddev->major_version] .load_super(rdev, rdev0, mddev->minor_version); if (err < 0) { - printk(KERN_WARNING + printk(KERN_WARNING "md: %s has different UUID to %s\n", - bdevname(rdev->bdev,b), + bdevname(rdev->bdev,b), bdevname(rdev0->bdev,b2)); export_rdev(rdev); return -EINVAL; @@ -5736,7 +5544,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) if (mddev->pers) { int err; if (!mddev->pers->hot_add_disk) { - printk(KERN_WARNING + printk(KERN_WARNING "%s: personality does not support diskops!\n", mdname(mddev)); return -EINVAL; @@ -5747,7 +5555,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) else rdev = md_import_device(dev, -1, -1); if (IS_ERR(rdev)) { - printk(KERN_WARNING + printk(KERN_WARNING "md: md_import_device returned %ld\n", PTR_ERR(rdev)); return PTR_ERR(rdev); @@ -5821,7 +5629,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) int err; rdev = md_import_device(dev, -1, 0); if (IS_ERR(rdev)) { - printk(KERN_WARNING + printk(KERN_WARNING "md: error, md_import_device() returned %ld\n", PTR_ERR(rdev)); return PTR_ERR(rdev); @@ -5856,7 +5664,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) return 0; } -static int hot_remove_disk(struct mddev * mddev, dev_t dev) +static int hot_remove_disk(struct mddev *mddev, dev_t dev) { char b[BDEVNAME_SIZE]; struct md_rdev *rdev; @@ -5882,7 +5690,7 @@ busy: return -EBUSY; } -static int hot_add_disk(struct mddev * mddev, dev_t dev) +static int hot_add_disk(struct mddev *mddev, dev_t dev) { char b[BDEVNAME_SIZE]; int err; @@ -5898,7 +5706,7 @@ static int hot_add_disk(struct mddev * mddev, dev_t dev) return -EINVAL; } if (!mddev->pers->hot_add_disk) { - printk(KERN_WARNING + printk(KERN_WARNING "%s: personality does not support diskops!\n", mdname(mddev)); return -EINVAL; @@ -5906,7 +5714,7 @@ static int hot_add_disk(struct mddev * mddev, dev_t dev) rdev = md_import_device(dev, -1, 0); if (IS_ERR(rdev)) { - printk(KERN_WARNING + printk(KERN_WARNING "md: error, md_import_device() returned %ld\n", PTR_ERR(rdev)); return -EINVAL; @@ -5920,7 +5728,7 @@ static int hot_add_disk(struct mddev * mddev, dev_t dev) rdev->sectors = rdev->sb_start; if (test_bit(Faulty, &rdev->flags)) { - printk(KERN_WARNING + printk(KERN_WARNING "md: can not hot-add faulty %s disk to %s!\n", bdevname(rdev->bdev,b), mdname(mddev)); err = -EINVAL; @@ -5968,7 +5776,6 @@ static int set_bitmap_file(struct mddev *mddev, int fd) /* we should be able to change the bitmap.. */ } - if (fd >= 0) { struct inode *inode; if (mddev->bitmap) @@ -6039,7 +5846,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) * The minor and patch _version numbers are also kept incase the * super_block handler wishes to interpret them. */ -static int set_array_info(struct mddev * mddev, mdu_array_info_t *info) +static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) { if (info->raid_disks == 0) { @@ -6048,7 +5855,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info) info->major_version >= ARRAY_SIZE(super_types) || super_types[info->major_version].name == NULL) { /* maybe try to auto-load a module? */ - printk(KERN_INFO + printk(KERN_INFO "md: superblock version %d not known\n", info->major_version); return -EINVAL; @@ -6196,7 +6003,6 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) return rv; } - /* * update_array_info is used to change the configuration of an * on-line array. @@ -6347,7 +6153,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) case GET_DISK_INFO: case HOT_ADD_DISK: case HOT_REMOVE_DISK: - case PRINT_RAID_DEBUG: case RAID_AUTORUN: case RAID_VERSION: case RESTART_ARRAY_RW: @@ -6391,18 +6196,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case RAID_VERSION: err = get_version(argp); - goto done; - - case PRINT_RAID_DEBUG: - err = 0; - md_print_devices(); - goto done; + goto out; #ifndef MODULE case RAID_AUTORUN: err = 0; autostart_arrays(arg); - goto done; + goto out; #endif default:; } @@ -6415,7 +6215,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, if (!mddev) { BUG(); - goto abort; + goto out; } /* Some actions do not requires the mutex */ @@ -6425,18 +6225,18 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -ENODEV; else err = get_array_info(mddev, argp); - goto abort; + goto out; case GET_DISK_INFO: if (!mddev->raid_disks && !mddev->external) err = -ENODEV; else err = get_disk_info(mddev, argp); - goto abort; + goto out; case SET_DISK_FAULTY: err = set_disk_faulty(mddev, new_decode_dev(arg)); - goto abort; + goto out; } if (cmd == ADD_NEW_DISK) @@ -6454,10 +6254,10 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, * and writes */ mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > 1) { + if (mddev->pers && atomic_read(&mddev->openers) > 1) { mutex_unlock(&mddev->open_mutex); err = -EBUSY; - goto abort; + goto out; } set_bit(MD_STILL_CLOSED, &mddev->flags); mutex_unlock(&mddev->open_mutex); @@ -6465,10 +6265,10 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } err = mddev_lock(mddev); if (err) { - printk(KERN_INFO + printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n", err, cmd); - goto abort; + goto out; } if (cmd == SET_ARRAY_INFO) { @@ -6477,38 +6277,38 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, memset(&info, 0, sizeof(info)); else if (copy_from_user(&info, argp, sizeof(info))) { err = -EFAULT; - goto abort_unlock; + goto unlock; } if (mddev->pers) { err = update_array_info(mddev, &info); if (err) { printk(KERN_WARNING "md: couldn't update" " array info. %d\n", err); - goto abort_unlock; + goto unlock; } - goto done_unlock; + goto unlock; } if (!list_empty(&mddev->disks)) { printk(KERN_WARNING "md: array %s already has disks!\n", mdname(mddev)); err = -EBUSY; - goto abort_unlock; + goto unlock; } if (mddev->raid_disks) { printk(KERN_WARNING "md: array %s already initialised!\n", mdname(mddev)); err = -EBUSY; - goto abort_unlock; + goto unlock; } err = set_array_info(mddev, &info); if (err) { printk(KERN_WARNING "md: couldn't set" " array info. %d\n", err); - goto abort_unlock; + goto unlock; } - goto done_unlock; + goto unlock; } /* @@ -6521,7 +6321,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE && cmd != GET_BITMAP_FILE) { err = -ENODEV; - goto abort_unlock; + goto unlock; } /* @@ -6530,23 +6330,23 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case GET_BITMAP_FILE: err = get_bitmap_file(mddev, argp); - goto done_unlock; + goto unlock; case RESTART_ARRAY_RW: err = restart_array(mddev); - goto done_unlock; + goto unlock; case STOP_ARRAY: err = do_md_stop(mddev, 0, bdev); - goto done_unlock; + goto unlock; case STOP_ARRAY_RO: err = md_set_readonly(mddev, bdev); - goto done_unlock; + goto unlock; case HOT_REMOVE_DISK: err = hot_remove_disk(mddev, new_decode_dev(arg)); - goto done_unlock; + goto unlock; case ADD_NEW_DISK: /* We can support ADD_NEW_DISK on read-only arrays @@ -6562,14 +6362,14 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, break; else err = add_new_disk(mddev, &info); - goto done_unlock; + goto unlock; } break; case BLKROSET: if (get_user(ro, (int __user *)(arg))) { err = -EFAULT; - goto done_unlock; + goto unlock; } err = -EINVAL; @@ -6577,11 +6377,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, * does not matter, no writes are coming */ if (ro) - goto done_unlock; + goto unlock; /* are we are already prepared for writes? */ if (mddev->ro != 1) - goto done_unlock; + goto unlock; /* transitioning to readauto need only happen for * arrays that call md_write_start @@ -6593,17 +6393,14 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, set_disk_ro(mddev->gendisk, 0); } } - goto done_unlock; + goto unlock; } /* * The remaining ioctls are changing the state of the * superblock, so we do not allow them on read-only arrays. - * However non-MD ioctls (e.g. get-size) will still come through - * here and hit the 'default' below, so only disallow - * 'md' ioctls, and switch to rw mode if started auto-readonly. */ - if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { + if (mddev->ro && mddev->pers) { if (mddev->ro == 2) { mddev->ro = 0; sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -6621,7 +6418,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } } else { err = -EROFS; - goto abort_unlock; + goto unlock; } } @@ -6633,38 +6430,32 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EFAULT; else err = add_new_disk(mddev, &info); - goto done_unlock; + goto unlock; } case HOT_ADD_DISK: err = hot_add_disk(mddev, new_decode_dev(arg)); - goto done_unlock; + goto unlock; case RUN_ARRAY: err = do_md_run(mddev); - goto done_unlock; + goto unlock; case SET_BITMAP_FILE: err = set_bitmap_file(mddev, (int)arg); - goto done_unlock; + goto unlock; default: err = -EINVAL; - goto abort_unlock; + goto unlock; } -done_unlock: -abort_unlock: +unlock: if (mddev->hold_active == UNTIL_IOCTL && err != -EINVAL) mddev->hold_active = 0; mddev_unlock(mddev); - - return err; -done: - if (err) - MD_BUG(); -abort: +out: return err; } #ifdef CONFIG_COMPAT @@ -6726,7 +6517,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) static void md_release(struct gendisk *disk, fmode_t mode) { - struct mddev *mddev = disk->private_data; + struct mddev *mddev = disk->private_data; BUG_ON(!mddev); atomic_dec(&mddev->openers); @@ -6761,7 +6552,7 @@ static const struct block_device_operations md_fops = .revalidate_disk= md_revalidate, }; -static int md_thread(void * arg) +static int md_thread(void *arg) { struct md_thread *thread = arg; @@ -6810,6 +6601,7 @@ void md_wakeup_thread(struct md_thread *thread) wake_up(&thread->wqueue); } } +EXPORT_SYMBOL(md_wakeup_thread); struct md_thread *md_register_thread(void (*run) (struct md_thread *), struct mddev *mddev, const char *name) @@ -6835,6 +6627,7 @@ struct md_thread *md_register_thread(void (*run) (struct md_thread *), } return thread; } +EXPORT_SYMBOL(md_register_thread); void md_unregister_thread(struct md_thread **threadp) { @@ -6852,14 +6645,10 @@ void md_unregister_thread(struct md_thread **threadp) kthread_stop(thread->tsk); kfree(thread); } +EXPORT_SYMBOL(md_unregister_thread); void md_error(struct mddev *mddev, struct md_rdev *rdev) { - if (!mddev) { - MD_BUG(); - return; - } - if (!rdev || test_bit(Faulty, &rdev->flags)) return; @@ -6876,6 +6665,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) queue_work(md_misc_wq, &mddev->event_work); md_new_event_inintr(mddev); } +EXPORT_SYMBOL(md_error); /* seq_file implementation /proc/mdstat */ @@ -6898,8 +6688,7 @@ static void status_unused(struct seq_file *seq) seq_printf(seq, "\n"); } - -static void status_resync(struct seq_file *seq, struct mddev * mddev) +static void status_resync(struct seq_file *seq, struct mddev *mddev) { sector_t max_sectors, resync, res; unsigned long dt, db; @@ -6919,13 +6708,7 @@ static void status_resync(struct seq_file *seq, struct mddev * mddev) else max_sectors = mddev->dev_sectors; - /* - * Should not happen. - */ - if (!max_sectors) { - MD_BUG(); - return; - } + WARN_ON(max_sectors == 0); /* Pick 'scale' such that (resync>>scale)*1000 will fit * in a sector_t, and (max_sectors>>scale) will fit in a * u32, as those are the requirements for sector_div. @@ -7021,7 +6804,7 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct list_head *tmp; struct mddev *next_mddev, *mddev = v; - + ++*pos; if (v == (void*)2) return NULL; @@ -7036,7 +6819,7 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) else { next_mddev = (void*)2; *pos = 0x10000; - } + } spin_unlock(&all_mddevs_lock); if (v != (void*)1) @@ -7132,7 +6915,7 @@ static int md_seq_show(struct seq_file *seq, void *v) if (mddev->pers) { mddev->pers->status(seq, mddev); - seq_printf(seq, "\n "); + seq_printf(seq, "\n "); if (mddev->pers->sync_request) { if (mddev->curr_resync > 2) { status_resync(seq, mddev); @@ -7150,7 +6933,7 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } mddev_unlock(mddev); - + return 0; } @@ -7204,12 +6987,14 @@ static const struct file_operations md_seq_fops = { int register_md_personality(struct md_personality *p) { + printk(KERN_INFO "md: %s personality registered for level %d\n", + p->name, p->level); spin_lock(&pers_lock); list_add_tail(&p->list, &pers_list); - printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level); spin_unlock(&pers_lock); return 0; } +EXPORT_SYMBOL(register_md_personality); int unregister_md_personality(struct md_personality *p) { @@ -7219,10 +7004,11 @@ int unregister_md_personality(struct md_personality *p) spin_unlock(&pers_lock); return 0; } +EXPORT_SYMBOL(unregister_md_personality); static int is_mddev_idle(struct mddev *mddev, int init) { - struct md_rdev * rdev; + struct md_rdev *rdev; int idle; int curr_events; @@ -7276,7 +7062,7 @@ void md_done_sync(struct mddev *mddev, int blocks, int ok) // stop recovery, signal do_sync .... } } - +EXPORT_SYMBOL(md_done_sync); /* md_write_start(mddev, bi) * If we need to update some array metadata (e.g. 'active' flag @@ -7317,6 +7103,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi) wait_event(mddev->sb_wait, !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } +EXPORT_SYMBOL(md_write_start); void md_write_end(struct mddev *mddev) { @@ -7327,6 +7114,7 @@ void md_write_end(struct mddev *mddev) mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); } } +EXPORT_SYMBOL(md_write_end); /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes @@ -7784,6 +7572,33 @@ no_add: return spares; } +static void md_start_sync(struct work_struct *ws) +{ + struct mddev *mddev = container_of(ws, struct mddev, del_work); + + mddev->sync_thread = md_register_thread(md_do_sync, + mddev, + "resync"); + if (!mddev->sync_thread) { + printk(KERN_ERR "%s: could not start resync" + " thread...\n", + mdname(mddev)); + /* leave the spares where they are, it shouldn't hurt */ + clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); + clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); + clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); + clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + if (test_and_clear_bit(MD_RECOVERY_RECOVER, + &mddev->recovery)) + if (mddev->sysfs_action) + sysfs_notify_dirent_safe(mddev->sysfs_action); + } else + md_wakeup_thread(mddev->sync_thread); + sysfs_notify_dirent_safe(mddev->sysfs_action); + md_new_event(mddev); +} + /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -7900,7 +7715,7 @@ void md_check_recovery(struct mddev *mddev) if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) - goto unlock; + goto not_running; /* no recovery is running. * remove any failed drives, then * add spares if possible. @@ -7912,7 +7727,7 @@ void md_check_recovery(struct mddev *mddev) if (mddev->pers->check_reshape == NULL || mddev->pers->check_reshape(mddev) != 0) /* Cannot proceed */ - goto unlock; + goto not_running; set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); } else if ((spares = remove_and_add_spares(mddev, NULL))) { @@ -7925,7 +7740,7 @@ void md_check_recovery(struct mddev *mddev) clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) /* nothing to be done ... */ - goto unlock; + goto not_running; if (mddev->pers->sync_request) { if (spares) { @@ -7935,27 +7750,11 @@ void md_check_recovery(struct mddev *mddev) */ bitmap_write_all(mddev->bitmap); } - mddev->sync_thread = md_register_thread(md_do_sync, - mddev, - "resync"); - if (!mddev->sync_thread) { - printk(KERN_ERR "%s: could not start resync" - " thread...\n", - mdname(mddev)); - /* leave the spares where they are, it shouldn't hurt */ - clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); - clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); - clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); - } else - md_wakeup_thread(mddev->sync_thread); - sysfs_notify_dirent_safe(mddev->sysfs_action); - md_new_event(mddev); + INIT_WORK(&mddev->del_work, md_start_sync); + queue_work(md_misc_wq, &mddev->del_work); + goto unlock; } - unlock: - wake_up(&mddev->sb_wait); - + not_running: if (!mddev->sync_thread) { clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); if (test_and_clear_bit(MD_RECOVERY_RECOVER, @@ -7963,9 +7762,12 @@ void md_check_recovery(struct mddev *mddev) if (mddev->sysfs_action) sysfs_notify_dirent_safe(mddev->sysfs_action); } + unlock: + wake_up(&mddev->sb_wait); mddev_unlock(mddev); } } +EXPORT_SYMBOL(md_check_recovery); void md_reap_sync_thread(struct mddev *mddev) { @@ -8008,6 +7810,7 @@ void md_reap_sync_thread(struct mddev *mddev) if (mddev->event_work.func) queue_work(md_misc_wq, &mddev->event_work); } +EXPORT_SYMBOL(md_reap_sync_thread); void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) { @@ -8641,7 +8444,6 @@ void md_autodetect_dev(dev_t dev) } } - static void autostart_arrays(int part) { struct md_rdev *rdev; @@ -8665,10 +8467,9 @@ static void autostart_arrays(int part) if (IS_ERR(rdev)) continue; - if (test_bit(Faulty, &rdev->flags)) { - MD_BUG(); + if (test_bit(Faulty, &rdev->flags)) continue; - } + set_bit(AutoDetected, &rdev->flags); list_add(&rdev->same_set, &pending_raid_disks); i_passed++; @@ -8736,20 +8537,8 @@ static int set_ro(const char *val, struct kernel_param *kp) module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); - module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); -EXPORT_SYMBOL(register_md_personality); -EXPORT_SYMBOL(unregister_md_personality); -EXPORT_SYMBOL(md_error); -EXPORT_SYMBOL(md_done_sync); -EXPORT_SYMBOL(md_write_start); -EXPORT_SYMBOL(md_write_end); -EXPORT_SYMBOL(md_register_thread); -EXPORT_SYMBOL(md_unregister_thread); -EXPORT_SYMBOL(md_wakeup_thread); -EXPORT_SYMBOL(md_check_recovery); -EXPORT_SYMBOL(md_reap_sync_thread); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); MODULE_ALIAS("md"); diff --git a/drivers/md/md.h b/drivers/md/md.h index a49d991f3fe..03cec5bdcaa 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -1,15 +1,15 @@ /* md.h : kernel internal structure of the Linux MD driver Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _MD_MD_H @@ -56,7 +56,7 @@ struct md_rdev { __u64 sb_events; sector_t data_offset; /* start of data in array */ sector_t new_data_offset;/* only relevant while reshaping */ - sector_t sb_start; /* offset of the super block (in 512byte sectors) */ + sector_t sb_start; /* offset of the super block (in 512byte sectors) */ int sb_size; /* bytes in the superblock */ int preferred_minor; /* autorun support */ @@ -239,7 +239,7 @@ struct mddev { minor_version, patch_version; int persistent; - int external; /* metadata is + int external; /* metadata is * managed externally */ char metadata_type[17]; /* externally set*/ int chunk_sectors; @@ -248,7 +248,7 @@ struct mddev { char clevel[16]; int raid_disks; int max_disks; - sector_t dev_sectors; /* used size of + sector_t dev_sectors; /* used size of * component devices */ sector_t array_sectors; /* exported array size */ int external_size; /* size managed @@ -312,7 +312,7 @@ struct mddev { int parallel_resync; int ok_start_degraded; - /* recovery/resync flags + /* recovery/resync flags * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started * SYNC: actually doing a resync, not a recovery @@ -392,20 +392,20 @@ struct mddev { unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. - */ + */ unsigned int safemode_delay; struct timer_list safemode_timer; - atomic_t writes_pending; + atomic_t writes_pending; struct request_queue *queue; /* for plugging ... */ - struct bitmap *bitmap; /* the bitmap for the device */ + struct bitmap *bitmap; /* the bitmap for the device */ struct { struct file *file; /* the bitmap file */ loff_t offset; /* offset from superblock of * start of bitmap. May be * negative, but not '0' * For external metadata, offset - * from start of device. + * from start of device. */ unsigned long space; /* space available at this offset */ loff_t default_offset; /* this is the offset to use when @@ -421,7 +421,7 @@ struct mddev { int external; } bitmap_info; - atomic_t max_corr_read_errors; /* max read retries */ + atomic_t max_corr_read_errors; /* max read retries */ struct list_head all_mddevs; struct attribute_group *to_remove; @@ -439,7 +439,6 @@ struct mddev { void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); }; - static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) { int faulty = test_bit(Faulty, &rdev->flags); @@ -449,7 +448,7 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) { - atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); + atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); } struct md_personality @@ -463,7 +462,7 @@ struct md_personality int (*stop)(struct mddev *mddev); void (*status)(struct seq_file *seq, struct mddev *mddev); /* error_handler must set ->faulty and clear ->in_sync - * if appropriate, and should abort recovery if needed + * if appropriate, and should abort recovery if needed */ void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev); int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev); @@ -493,7 +492,6 @@ struct md_personality void *(*takeover) (struct mddev *mddev); }; - struct md_sysfs_entry { struct attribute attr; ssize_t (*show)(struct mddev *, char *); @@ -560,7 +558,7 @@ struct md_thread { void (*run) (struct md_thread *thread); struct mddev *mddev; wait_queue_head_t wqueue; - unsigned long flags; + unsigned long flags; struct task_struct *tsk; unsigned long timeout; void *private; @@ -594,7 +592,7 @@ extern void md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(struct mddev *mddev); -extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, +extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int rw, bool metadata_op); extern void md_do_sync(struct md_thread *thread); extern void md_new_event(struct mddev *mddev); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 849ad39f547..399272f9c04 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -31,13 +31,12 @@ #define NR_RESERVED_BUFS 32 - static int multipath_map (struct mpconf *conf) { int i, disks = conf->raid_disks; /* - * Later we do read balancing on the read side + * Later we do read balancing on the read side * now we use the first available disk. */ @@ -68,7 +67,6 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) md_wakeup_thread(mddev->thread); } - /* * multipath_end_bh_io() is called when we have finished servicing a multipathed * operation and are ready to return a success/failure code to the buffer @@ -98,8 +96,8 @@ static void multipath_end_request(struct bio *bio, int error) */ char b[BDEVNAME_SIZE]; md_error (mp_bh->mddev, rdev); - printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n", - bdevname(rdev->bdev,b), + printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n", + bdevname(rdev->bdev,b), (unsigned long long)bio->bi_iter.bi_sector); multipath_reschedule_retry(mp_bh); } else @@ -145,12 +143,12 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev) { struct mpconf *conf = mddev->private; int i; - + seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", - conf->multipaths[i].rdev && + conf->multipaths[i].rdev && test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_"); seq_printf (seq, "]"); } @@ -195,7 +193,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev) * first check if this is a queued request for a device * which has just failed. */ - printk(KERN_ALERT + printk(KERN_ALERT "multipath: only one IO path left and IO error.\n"); /* leave it active... it's all we have */ return; @@ -242,7 +240,6 @@ static void print_multipath_conf (struct mpconf *conf) } } - static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) { struct mpconf *conf = mddev->private; @@ -325,8 +322,6 @@ abort: return err; } - - /* * This is a kernel thread which: * @@ -356,7 +351,7 @@ static void multipathd(struct md_thread *thread) bio = &mp_bh->bio; bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector; - + if ((mp_bh->path = multipath_map (conf))<0) { printk(KERN_ALERT "multipath: %s: unrecoverable IO read" " error for block %llu\n", @@ -414,7 +409,7 @@ static int multipath_run (struct mddev *mddev) conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL); mddev->private = conf; if (!conf) { - printk(KERN_ERR + printk(KERN_ERR "multipath: couldn't allocate memory for %s\n", mdname(mddev)); goto out; @@ -423,7 +418,7 @@ static int multipath_run (struct mddev *mddev) conf->multipaths = kzalloc(sizeof(struct multipath_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->multipaths) { - printk(KERN_ERR + printk(KERN_ERR "multipath: couldn't allocate memory for %s\n", mdname(mddev)); goto out_free_conf; @@ -469,7 +464,7 @@ static int multipath_run (struct mddev *mddev) conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS, sizeof(struct multipath_bh)); if (conf->pool == NULL) { - printk(KERN_ERR + printk(KERN_ERR "multipath: couldn't allocate memory for %s\n", mdname(mddev)); goto out_free_conf; @@ -485,7 +480,7 @@ static int multipath_run (struct mddev *mddev) } } - printk(KERN_INFO + printk(KERN_INFO "multipath: array %s active with %d out of %d IO paths\n", mdname(mddev), conf->raid_disks - mddev->degraded, mddev->raid_disks); @@ -512,7 +507,6 @@ out: return -EIO; } - static int multipath_stop (struct mddev *mddev) { struct mpconf *conf = mddev->private; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index cf91f5910c7..ba6b85de96d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -1,10 +1,9 @@ /* raid0.c : Multiple Devices driver for Linux - Copyright (C) 1994-96 Marc ZYNGIER + Copyright (C) 1994-96 Marc ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr> - Copyright (C) 1999, 2000 Ingo Molnar, Red Hat - + Copyright (C) 1999, 2000 Ingo Molnar, Red Hat RAID-0 management functions. @@ -12,10 +11,10 @@ it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/blkdev.h> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 55de4f6f7ea..40b35be34f8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -494,7 +494,6 @@ static void raid1_end_write_request(struct bio *bio, int error) bio_put(to_put); } - /* * This routine returns the disk from which the requested read should * be done. There is a per-array 'next expected sequential IO' sector @@ -901,18 +900,18 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) * However if there are already pending * requests (preventing the barrier from * rising completely), and the - * pre-process bio queue isn't empty, + * per-process bio queue isn't empty, * then don't wait, as we need to empty - * that queue to get the nr_pending - * count down. + * that queue to allow conf->start_next_window + * to increase. */ wait_event_lock_irq(conf->wait_barrier, !conf->array_frozen && (!conf->barrier || - ((conf->start_next_window < - conf->next_resync + RESYNC_SECTORS) && - current->bio_list && - !bio_list_empty(current->bio_list))), + ((conf->start_next_window < + conf->next_resync + RESYNC_SECTORS) && + current->bio_list && + !bio_list_empty(current->bio_list))), conf->resync_lock); conf->nr_waiting--; } @@ -1001,8 +1000,7 @@ static void unfreeze_array(struct r1conf *conf) spin_unlock_irq(&conf->resync_lock); } - -/* duplicate the data pages for behind I/O +/* duplicate the data pages for behind I/O */ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio) { @@ -1471,7 +1469,6 @@ static void status(struct seq_file *seq, struct mddev *mddev) seq_printf(seq, "]"); } - static void error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; @@ -1565,7 +1562,7 @@ static int raid1_spare_active(struct mddev *mddev) unsigned long flags; /* - * Find all failed disks within the RAID1 configuration + * Find all failed disks within the RAID1 configuration * and mark them readable. * Called under mddev lock, so rcu protection not needed. */ @@ -1606,7 +1603,6 @@ static int raid1_spare_active(struct mddev *mddev) return count; } - static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) { struct r1conf *conf = mddev->private; @@ -1735,7 +1731,6 @@ abort: return err; } - static void end_sync_read(struct bio *bio, int error) { struct r1bio *r1_bio = bio->bi_private; @@ -1947,7 +1942,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) return 1; } -static int process_checks(struct r1bio *r1_bio) +static void process_checks(struct r1bio *r1_bio) { /* We have read all readable devices. If we haven't * got the block, then there is no hope left. @@ -2039,7 +2034,6 @@ static int process_checks(struct r1bio *r1_bio) bio_copy_data(sbio, pbio); } - return 0; } static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) @@ -2057,8 +2051,8 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) return; if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) - if (process_checks(r1_bio) < 0) - return; + process_checks(r1_bio); + /* * schedule writes */ @@ -2458,7 +2452,6 @@ static void raid1d(struct md_thread *thread) blk_finish_plug(&plug); } - static int init_resync(struct r1conf *conf) { int buffs; @@ -2722,7 +2715,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp /* remove last page from this bio */ bio->bi_vcnt--; bio->bi_iter.bi_size -= len; - bio->bi_flags &= ~(1<< BIO_SEG_VALID); + __clear_bit(BIO_SEG_VALID, &bio->bi_flags); } goto bio_full; } @@ -2947,9 +2940,9 @@ static int run(struct mddev *mddev) printk(KERN_NOTICE "md/raid1:%s: not clean" " -- starting background reconstruction\n", mdname(mddev)); - printk(KERN_INFO + printk(KERN_INFO "md/raid1:%s: active with %d out of %d mirrors\n", - mdname(mddev), mddev->raid_disks - mddev->degraded, + mdname(mddev), mddev->raid_disks - mddev->degraded, mddev->raid_disks); /* diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 9bebca7bff2..33bda55ef9f 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -90,7 +90,6 @@ struct r1conf { */ int recovery_disabled; - /* poolinfo contains information about the content of the * mempools - it changes when the array grows or shrinks */ @@ -103,7 +102,6 @@ struct r1conf { */ struct page *tmppage; - /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6703751d87d..32e282f4c83 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -366,7 +366,6 @@ static void raid10_end_read_request(struct bio *bio, int error) struct md_rdev *rdev; struct r10conf *conf = r10_bio->mddev->private; - slot = r10_bio->read_slot; dev = r10_bio->devs[slot].devnum; rdev = r10_bio->devs[slot].rdev; @@ -1559,7 +1558,6 @@ static void make_request(struct mddev *mddev, struct bio *bio) md_write_start(mddev, bio); - do { /* @@ -1782,7 +1780,6 @@ static int raid10_spare_active(struct mddev *mddev) return count; } - static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) { struct r10conf *conf = mddev->private; @@ -1929,7 +1926,6 @@ abort: return err; } - static void end_sync_read(struct bio *bio, int error) { struct r10bio *r10_bio = bio->bi_private; @@ -2295,7 +2291,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) } } - /* * Used by fix_read_error() to decay the per rdev read_errors. * We halve the read error count for every hour that has elapsed @@ -2852,7 +2847,6 @@ static void raid10d(struct md_thread *thread) blk_finish_plug(&plug); } - static int init_resync(struct r10conf *conf) { int buffs; @@ -3388,7 +3382,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_iter.bi_size -= len; - bio2->bi_flags &= ~(1<< BIO_SEG_VALID); + __clear_bit(BIO_SEG_VALID, &bio2->bi_flags); } goto bio_full; } @@ -3776,7 +3770,6 @@ static int run(struct mddev *mddev) blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); } - if (md_integrity_register(mddev)) goto out_free_conf; @@ -3834,6 +3827,8 @@ static int stop(struct mddev *mddev) mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); + kfree(conf->mirrors_old); + kfree(conf->mirrors_new); kfree(conf); mddev->private = NULL; return 0; @@ -4121,7 +4116,7 @@ static int raid10_start_reshape(struct mddev *mddev) memcpy(conf->mirrors_new, conf->mirrors, sizeof(struct raid10_info)*conf->prev.raid_disks); smp_mb(); - kfree(conf->mirrors_old); /* FIXME and elsewhere */ + kfree(conf->mirrors_old); conf->mirrors_old = conf->mirrors; conf->mirrors = conf->mirrors_new; conf->mirrors_new = NULL; @@ -4416,7 +4411,7 @@ read_more: read_bio->bi_end_io = end_sync_read; read_bio->bi_rw = READ; read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); - read_bio->bi_flags |= 1 << BIO_UPTODATE; + __set_bit(BIO_UPTODATE, &read_bio->bi_flags); read_bio->bi_vcnt = 0; read_bio->bi_iter.bi_size = 0; r10_bio->master_bio = read_bio; @@ -4473,7 +4468,7 @@ read_more: /* Remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_iter.bi_size -= len; - bio2->bi_flags &= ~(1<<BIO_SEG_VALID); + __clear_bit(BIO_SEG_VALID, &bio2->bi_flags); } goto bio_full; } @@ -4575,7 +4570,6 @@ static void end_reshape(struct r10conf *conf) conf->fullsync = 0; } - static int handle_reshape_read_error(struct mddev *mddev, struct r10bio *r10_bio) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9f0fbecd1eb..9c66e5997fc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -463,7 +463,6 @@ static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh) hlist_add_head(&sh->hash, hp); } - /* find an idle stripe, make sure it is unhashed, and return it. */ static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash) { @@ -531,9 +530,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) BUG_ON(stripe_operations_active(sh)); pr_debug("init_stripe called, stripe %llu\n", - (unsigned long long)sh->sector); - - remove_hash(sh); + (unsigned long long)sector); retry: seq = read_seqcount_begin(&conf->gen_lock); sh->generation = conf->generation - previous; @@ -542,7 +539,6 @@ retry: stripe_set_idx(sector, conf, previous, sh); sh->state = 0; - for (i = sh->disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -1350,7 +1346,6 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) } } - static void ops_complete_prexor(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; @@ -2419,7 +2414,6 @@ static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, return new_sector; } - static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) { struct r5conf *conf = sh->raid_conf; @@ -2437,7 +2431,6 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) sector_t r_sector; struct stripe_head sh2; - chunk_offset = sector_div(new_sector, sectors_per_chunk); stripe = new_sector; @@ -2541,7 +2534,6 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) return r_sector; } - static void schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, int rcw, int expand) @@ -3013,7 +3005,6 @@ static void handle_stripe_fill(struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh->state); } - /* handle_stripe_clean_event * any written block on an uptodate or failed drive can be returned. * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but @@ -3304,7 +3295,6 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, } } - static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, struct stripe_head_state *s, int disks) @@ -3939,7 +3929,6 @@ static void handle_stripe(struct stripe_head *sh) } } - /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { struct stripe_head *sh_src @@ -4137,7 +4126,6 @@ static int raid5_mergeable_bvec(struct request_queue *q, return max; } - static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) { sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev); @@ -4167,7 +4155,6 @@ static void add_bio_to_retry(struct bio *bi,struct r5conf *conf) md_wakeup_thread(conf->mddev->thread); } - static struct bio *remove_bio_from_retry(struct r5conf *conf) { struct bio *bi; @@ -4191,7 +4178,6 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) return bi; } - /* * The "raid5_align_endio" should check if the read succeeded and if it * did, call bio_endio on the original bio (having bio_put the new bio @@ -4224,7 +4210,6 @@ static void raid5_align_endio(struct bio *bi, int error) return; } - pr_debug("raid5_align_endio : io error...handing IO for a retry\n"); add_bio_to_retry(raid_bi, conf); @@ -4249,7 +4234,6 @@ static int bio_fits_rdev(struct bio *bi) return 1; } - static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) { struct r5conf *conf = mddev->private; @@ -4301,7 +4285,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; align_bi->bi_bdev = rdev->bdev; - align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); + __clear_bit(BIO_SEG_VALID, &align_bi->bi_flags); if (!bio_fits_rdev(align_bi) || is_badblock(rdev, align_bi->bi_iter.bi_sector, @@ -5446,7 +5430,6 @@ raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR, raid5_show_skip_copy, raid5_store_skip_copy); - static ssize_t stripe_cache_active_show(struct mddev *mddev, char *page) { @@ -5898,7 +5881,6 @@ static struct r5conf *setup_conf(struct mddev *mddev) return ERR_PTR(-ENOMEM); } - static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded) { switch (algo) { @@ -5911,7 +5893,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded return 1; break; case ALGORITHM_PARITY_0_6: - if (raid_disk == 0 || + if (raid_disk == 0 || raid_disk == raid_disks - 1) return 1; break; @@ -6165,7 +6147,6 @@ static int run(struct mddev *mddev) "reshape"); } - /* Ok, everything is just fine now */ if (mddev->to_remove == &raid5_attrs_group) mddev->to_remove = NULL; @@ -6814,7 +6795,6 @@ static void raid5_quiesce(struct mddev *mddev, int state) } } - static void *raid45_takeover_raid0(struct mddev *mddev, int level) { struct r0conf *raid0_conf = mddev->private; @@ -6841,7 +6821,6 @@ static void *raid45_takeover_raid0(struct mddev *mddev, int level) return setup_conf(mddev); } - static void *raid5_takeover_raid1(struct mddev *mddev) { int chunksect; @@ -6902,7 +6881,6 @@ static void *raid5_takeover_raid6(struct mddev *mddev) return setup_conf(mddev); } - static int raid5_check_reshape(struct mddev *mddev) { /* For a 2-drive array, the layout and chunk size can be changed @@ -7051,7 +7029,6 @@ static void *raid6_takeover(struct mddev *mddev) return setup_conf(mddev); } - static struct md_personality raid6_personality = { .name = "raid6", diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index bc72cd4be5f..d59f5ca743c 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -155,7 +155,7 @@ */ /* - * Operations state - intermediate states that are visible outside of + * Operations state - intermediate states that are visible outside of * STRIPE_ACTIVE. * In general _idle indicates nothing is running, _run indicates a data * processing operation is active, and _result means the data processing result @@ -364,7 +364,6 @@ enum { * HANDLE gets cleared if stripe_handle leaves nothing locked. */ - struct disk_info { struct md_rdev *rdev, *replacement; }; @@ -528,7 +527,6 @@ struct r5conf { #define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */ #define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */ - /* For every RAID5 algorithm we define a RAID6 algorithm * with exactly the same layout for data and parity, and * with the Q block always on the last device (N-1). diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c index f14b60080c2..e64794730e2 100644 --- a/drivers/misc/mic/card/mic_virtio.c +++ b/drivers/misc/mic/card/mic_virtio.c @@ -462,16 +462,12 @@ static void mic_handle_config_change(struct mic_device_desc __iomem *d, struct mic_device_ctrl __iomem *dc = (void __iomem *)d + mic_aligned_desc_size(d); struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev); - struct virtio_driver *drv; if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED) return; dev_dbg(mdrv->dev, "%s %d\n", __func__, __LINE__); - drv = container_of(mvdev->vdev.dev.driver, - struct virtio_driver, driver); - if (drv->config_changed) - drv->config_changed(&mvdev->vdev); + virtio_config_changed(&mvdev->vdev); iowrite8(1, &dc->guest_ack); } diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index feea926e32f..cfa6110632c 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -210,6 +210,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, blk_queue_prep_rq(mq->queue, mmc_prep_request); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, mq->queue); if (mmc_can_erase(card)) mmc_queue_setup_discard(mq->queue, card); diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index adfa74c1bc4..8057f52a45b 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -199,6 +199,17 @@ static int bcm47xxpart_parse(struct mtd_info *master, continue; } + /* + * New (ARM?) devices may have NVRAM in some middle block. Last + * block will be checked later, so skip it. + */ + if (offset != master->size - blocksize && + buf[0x000 / 4] == NVRAM_HEADER) { + bcm47xxpart_add_part(&parts[curr_part++], "nvram", + offset, 0); + continue; + } + /* Read middle of the block */ if (mtd_read(master, offset + 0x8000, 0x4, &bytes_read, (uint8_t *)buf) < 0) { diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 46c4643b7a0..c50d8cf0f60 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2033,6 +2033,8 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip, udelay(1); } + + retries--; } /* the chip never became ready */ diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index c68868f6058..f0b0e611d1d 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -12,7 +12,6 @@ obj-$(CONFIG_MTD_LART) += lart.o obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o obj-$(CONFIG_MTD_M25P80) += m25p80.o -obj-$(CONFIG_MTD_NAND_OMAP_BCH) += elm.o obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o obj-$(CONFIG_MTD_SST25L) += sst25l.o obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index 91a169c44b3..21cc4b66fea 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -1697,16 +1697,16 @@ static int dbg_asicmode_show(struct seq_file *s, void *p) switch (mode) { case DOC_ASICMODE_RESET: - pos += seq_printf(s, "reset"); + pos += seq_puts(s, "reset"); break; case DOC_ASICMODE_NORMAL: - pos += seq_printf(s, "normal"); + pos += seq_puts(s, "normal"); break; case DOC_ASICMODE_POWERDOWN: - pos += seq_printf(s, "powerdown"); + pos += seq_puts(s, "powerdown"); break; } - pos += seq_printf(s, ")\n"); + pos += seq_puts(s, ")\n"); return pos; } DEBUGFS_RO_ATTR(asic_mode, dbg_asicmode_show); @@ -1745,22 +1745,22 @@ static int dbg_protection_show(struct seq_file *s, void *p) pos += seq_printf(s, "Protection = 0x%02x (", protect); if (protect & DOC_PROTECT_FOUNDRY_OTP_LOCK) - pos += seq_printf(s, "FOUNDRY_OTP_LOCK,"); + pos += seq_puts(s, "FOUNDRY_OTP_LOCK,"); if (protect & DOC_PROTECT_CUSTOMER_OTP_LOCK) - pos += seq_printf(s, "CUSTOMER_OTP_LOCK,"); + pos += seq_puts(s, "CUSTOMER_OTP_LOCK,"); if (protect & DOC_PROTECT_LOCK_INPUT) - pos += seq_printf(s, "LOCK_INPUT,"); + pos += seq_puts(s, "LOCK_INPUT,"); if (protect & DOC_PROTECT_STICKY_LOCK) - pos += seq_printf(s, "STICKY_LOCK,"); + pos += seq_puts(s, "STICKY_LOCK,"); if (protect & DOC_PROTECT_PROTECTION_ENABLED) - pos += seq_printf(s, "PROTECTION ON,"); + pos += seq_puts(s, "PROTECTION ON,"); if (protect & DOC_PROTECT_IPL_DOWNLOAD_LOCK) - pos += seq_printf(s, "IPL_DOWNLOAD_LOCK,"); + pos += seq_puts(s, "IPL_DOWNLOAD_LOCK,"); if (protect & DOC_PROTECT_PROTECTION_ERROR) - pos += seq_printf(s, "PROTECT_ERR,"); + pos += seq_puts(s, "PROTECT_ERR,"); else - pos += seq_printf(s, "NO_PROTECT_ERR"); - pos += seq_printf(s, ")\n"); + pos += seq_puts(s, "NO_PROTECT_ERR"); + pos += seq_puts(s, ")\n"); pos += seq_printf(s, "DPS0 = 0x%02x : " "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, " diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index ed7e0a1bed3..dcda6287228 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -193,11 +193,14 @@ static int m25p_probe(struct spi_device *spi) { struct mtd_part_parser_data ppdata; struct flash_platform_data *data; + const struct spi_device_id *id = NULL; struct m25p *flash; struct spi_nor *nor; enum read_mode mode = SPI_NOR_NORMAL; int ret; + data = dev_get_platdata(&spi->dev); + flash = devm_kzalloc(&spi->dev, sizeof(*flash), GFP_KERNEL); if (!flash) return -ENOMEM; @@ -223,11 +226,26 @@ static int m25p_probe(struct spi_device *spi) mode = SPI_NOR_QUAD; else if (spi->mode & SPI_RX_DUAL) mode = SPI_NOR_DUAL; - ret = spi_nor_scan(nor, spi_get_device_id(spi), mode); + + if (data && data->name) + flash->mtd.name = data->name; + + /* For some (historical?) reason many platforms provide two different + * names in flash_platform_data: "name" and "type". Quite often name is + * set to "m25p80" and then "type" provides a real chip name. + * If that's the case, respect "type" and ignore a "name". + */ + if (data && data->type) + id = spi_nor_match_id(data->type); + + /* If we didn't get name from platform, simply use "modalias". */ + if (!id) + id = spi_get_device_id(spi); + + ret = spi_nor_scan(nor, id, mode); if (ret) return ret; - data = dev_get_platdata(&spi->dev); ppdata.of_node = spi->dev.of_node; return mtd_device_parse_register(&flash->mtd, NULL, &ppdata, diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 21b2874a303..ba801d2c6dc 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -249,7 +249,7 @@ config MTD_CFI_FLAGADM config MTD_SOLUTIONENGINE tristate "CFI Flash device mapped on Hitachi SolutionEngine" - depends on SUPERH && SOLUTION_ENGINE && MTD_CFI && MTD_REDBOOT_PARTS + depends on SOLUTION_ENGINE && MTD_CFI && MTD_REDBOOT_PARTS help This enables access to the flash chips on the Hitachi SolutionEngine and similar boards. Say 'Y' if you are building a kernel for such a board. diff --git a/drivers/mtd/maps/gpio-addr-flash.c b/drivers/mtd/maps/gpio-addr-flash.c index a4c477b9fdd..2fb346091af 100644 --- a/drivers/mtd/maps/gpio-addr-flash.c +++ b/drivers/mtd/maps/gpio-addr-flash.c @@ -99,22 +99,28 @@ static map_word gf_read(struct map_info *map, unsigned long ofs) * @from: flash offset to copy from * @len: how much to copy * - * We rely on the MTD layer to chunk up copies such that a single request here - * will not cross a window size. This allows us to only wiggle the GPIOs once - * before falling back to a normal memcpy. Reading the higher layer code shows - * that this is indeed the case, but add a BUG_ON() to future proof. + * The "from" region may straddle more than one window, so toggle the GPIOs for + * each window region before reading its data. */ static void gf_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) { struct async_state *state = gf_map_info_to_state(map); - gf_set_gpios(state, from); + int this_len; - /* BUG if operation crosses the win_size */ - BUG_ON(!((from + len) % state->win_size <= (from + len))); + while (len) { + if ((from % state->win_size) + len > state->win_size) + this_len = state->win_size - (from % state->win_size); + else + this_len = len; - /* operation does not cross the win_size, so one shot it */ - memcpy_fromio(to, map->virt + (from % state->win_size), len); + gf_set_gpios(state, from); + memcpy_fromio(to, map->virt + (from % state->win_size), + this_len); + len -= this_len; + from += this_len; + to += this_len; + } } /** @@ -147,13 +153,21 @@ static void gf_copy_to(struct map_info *map, unsigned long to, { struct async_state *state = gf_map_info_to_state(map); - gf_set_gpios(state, to); + int this_len; + + while (len) { + if ((to % state->win_size) + len > state->win_size) + this_len = state->win_size - (to % state->win_size); + else + this_len = len; - /* BUG if operation crosses the win_size */ - BUG_ON(!((to + len) % state->win_size <= (to + len))); + gf_set_gpios(state, to); + memcpy_toio(map->virt + (to % state->win_size), from, len); - /* operation does not cross the win_size, so one shot it */ - memcpy_toio(map->virt + (to % state->win_size), from, len); + len -= this_len; + to += this_len; + from += this_len; + } } static const char * const part_probe_types[] = { diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index a3cfad392ed..af747af5eee 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -89,7 +89,7 @@ static caddr_t remap_window(struct map_info *map, unsigned long to) if (!pcmcia_dev_present(dev->p_dev)) { pr_debug("device removed\n"); - return 0; + return NULL; } offset = to & ~(dev->win_size-1); diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 217c25d7381..c1d21cb501c 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -103,7 +103,7 @@ static struct mtd_info *obsolete_probe(struct platform_device *dev, if (strcmp(of_probe, "ROM") != 0) dev_warn(&dev->dev, "obsolete_probe: don't know probe " "type '%s', mapping as rom\n", of_probe); - return do_map_probe("mtd_rom", map); + return do_map_probe("map_rom", map); } } @@ -340,6 +340,10 @@ static struct of_device_id of_flash_match[] = { .data = (void *)"map_ram", }, { + .compatible = "mtd-rom", + .data = (void *)"map_rom", + }, + { .type = "rom", .compatible = "direct-mapped" }, diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 43e30992a36..d08229eb44d 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -417,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) blk_queue_logical_block_size(new->rq, tr->blksize); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq); if (tr->discard) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq); diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index a0f54e80670..53563955931 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -549,6 +549,9 @@ static int mtdchar_blkpg_ioctl(struct mtd_info *mtd, if (mtd_is_partition(mtd)) return -EINVAL; + /* Sanitize user input */ + p.devname[BLKPG_DEVNAMELTH - 1] = '\0'; + return mtd_add_partition(mtd, p.devname, p.start, p.length); case BLKPG_DEL_PARTITION: diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index e4831b4159d..4c611871d7e 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -105,12 +105,11 @@ static LIST_HEAD(mtd_notifiers); */ static void mtd_release(struct device *dev) { - struct mtd_info __maybe_unused *mtd = dev_get_drvdata(dev); + struct mtd_info *mtd = dev_get_drvdata(dev); dev_t index = MTD_DEVT(mtd->index); - /* remove /dev/mtdXro node if needed */ - if (index) - device_destroy(&mtd_class, index + 1); + /* remove /dev/mtdXro node */ + device_destroy(&mtd_class, index + 1); } static int mtd_cls_suspend(struct device *dev, pm_message_t state) @@ -442,10 +441,8 @@ int add_mtd_device(struct mtd_info *mtd) if (device_register(&mtd->dev) != 0) goto fail_added; - if (MTD_DEVT(i)) - device_create(&mtd_class, mtd->dev.parent, - MTD_DEVT(i) + 1, - NULL, "mtd%dro", i); + device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, + "mtd%dro", i); pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing @@ -778,7 +775,7 @@ EXPORT_SYMBOL_GPL(__put_mtd_device); */ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) { - if (instr->addr > mtd->size || instr->len > mtd->size - instr->addr) + if (instr->addr >= mtd->size || instr->len > mtd->size - instr->addr) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; @@ -804,7 +801,7 @@ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, *phys = 0; if (!mtd->_point) return -EOPNOTSUPP; - if (from < 0 || from > mtd->size || len > mtd->size - from) + if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; @@ -817,7 +814,7 @@ int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { if (!mtd->_point) return -EOPNOTSUPP; - if (from < 0 || from > mtd->size || len > mtd->size - from) + if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; @@ -835,7 +832,7 @@ unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, { if (!mtd->_get_unmapped_area) return -EOPNOTSUPP; - if (offset > mtd->size || len > mtd->size - offset) + if (offset >= mtd->size || len > mtd->size - offset) return -EINVAL; return mtd->_get_unmapped_area(mtd, len, offset, flags); } @@ -846,7 +843,7 @@ int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, { int ret_code; *retlen = 0; - if (from < 0 || from > mtd->size || len > mtd->size - from) + if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; @@ -869,7 +866,7 @@ int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { *retlen = 0; - if (to < 0 || to > mtd->size || len > mtd->size - to) + if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!mtd->_write || !(mtd->flags & MTD_WRITEABLE)) return -EROFS; @@ -892,7 +889,7 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, *retlen = 0; if (!mtd->_panic_write) return -EOPNOTSUPP; - if (to < 0 || to > mtd->size || len > mtd->size - to) + if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; @@ -1011,7 +1008,7 @@ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_lock) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; @@ -1023,7 +1020,7 @@ int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_unlock) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; @@ -1035,7 +1032,7 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_is_locked) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; @@ -1045,7 +1042,7 @@ EXPORT_SYMBOL_GPL(mtd_is_locked); int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) { - if (ofs < 0 || ofs > mtd->size) + if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!mtd->_block_isreserved) return 0; @@ -1055,7 +1052,7 @@ EXPORT_SYMBOL_GPL(mtd_block_isreserved); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) { - if (ofs < 0 || ofs > mtd->size) + if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!mtd->_block_isbad) return 0; @@ -1067,7 +1064,7 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { if (!mtd->_block_markbad) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size) + if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 8b33b26eb12..fc8b3d16cce 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -145,7 +145,7 @@ struct mtdswap_dev { struct mtdswap_oobdata { __le16 magic; __le32 count; -} __attribute__((packed)); +} __packed; #define MTDSWAP_MAGIC_CLEAN 0x2095 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) @@ -1287,7 +1287,7 @@ static int mtdswap_show(struct seq_file *s, void *data) seq_printf(s, "total erasures: %lu\n", sum); - seq_printf(s, "\n"); + seq_puts(s, "\n"); seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); @@ -1296,7 +1296,7 @@ static int mtdswap_show(struct seq_file *s, void *data) seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); - seq_printf(s, "\n"); + seq_puts(s, "\n"); seq_printf(s, "total pages: %u\n", pages); seq_printf(s, "pages mapped: %u\n", mapped); @@ -1474,7 +1474,7 @@ static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) } eblocks = mtd_div_by_eb(use_size, mtd); - use_size = eblocks * mtd->erasesize; + use_size = (uint64_t)eblocks * mtd->erasesize; bad_blocks = mtdswap_badblocks(mtd, use_size); eavailable = eblocks - bad_blocks; diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index f1cf503517f..dd10646982a 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -96,7 +96,7 @@ config MTD_NAND_OMAP2 config MTD_NAND_OMAP_BCH depends on MTD_NAND_OMAP2 - tristate "Support hardware based BCH error correction" + bool "Support hardware based BCH error correction" default n select BCH help @@ -104,7 +104,10 @@ config MTD_NAND_OMAP_BCH locate and correct errors when using BCH ECC scheme. This offloads the cpu from doing ECC error searching and correction. However some legacy OMAP families like OMAP2xxx, OMAP3xxx do not have ELM engine - so they should not enable this config symbol. + so this is optional for them. + +config MTD_NAND_OMAP_BCH_BUILD + def_tristate MTD_NAND_OMAP2 && MTD_NAND_OMAP_BCH config MTD_NAND_IDS tristate diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index a035e7cc6d4..9c847e469ca 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_MTD_NAND_NDFC) += ndfc.o obj-$(CONFIG_MTD_NAND_ATMEL) += atmel_nand.o obj-$(CONFIG_MTD_NAND_GPIO) += gpio.o obj-$(CONFIG_MTD_NAND_OMAP2) += omap2.o +obj-$(CONFIG_MTD_NAND_OMAP_BCH_BUILD) += omap_elm.o obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o obj-$(CONFIG_MTD_NAND_PXA3xx) += pxa3xx_nand.o obj-$(CONFIG_MTD_NAND_TMIO) += tmio_nand.o diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index e321c564ff0..19d1e9d17bf 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -27,6 +27,7 @@ * */ +#include <linux/clk.h> #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> @@ -96,6 +97,8 @@ struct atmel_nfc { bool use_nfc_sram; bool write_by_sram; + struct clk *clk; + bool is_initialized; struct completion comp_ready; struct completion comp_cmd_done; @@ -128,8 +131,6 @@ struct atmel_nand_host { u32 pmecc_lookup_table_offset_512; u32 pmecc_lookup_table_offset_1024; - int pmecc_bytes_per_sector; - int pmecc_sector_number; int pmecc_degree; /* Degree of remainders */ int pmecc_cw_len; /* Length of codeword */ @@ -841,7 +842,7 @@ static void pmecc_correct_data(struct mtd_info *mtd, uint8_t *buf, uint8_t *ecc, pos, bit_pos, err_byte, *(buf + byte_pos)); } else { /* Bit flip in OOB area */ - tmp = sector_num * host->pmecc_bytes_per_sector + tmp = sector_num * nand_chip->ecc.bytes + (byte_pos - sector_size); err_byte = ecc[tmp]; ecc[tmp] ^= (1 << bit_pos); @@ -874,7 +875,7 @@ static int pmecc_correction(struct mtd_info *mtd, u32 pmecc_stat, uint8_t *buf, return 0; normal_check: - for (i = 0; i < host->pmecc_sector_number; i++) { + for (i = 0; i < nand_chip->ecc.steps; i++) { err_nbr = 0; if (pmecc_stat & 0x1) { buf_pos = buf + i * host->pmecc_sector_size; @@ -890,7 +891,7 @@ normal_check: return -EIO; } else { pmecc_correct_data(mtd, buf_pos, ecc, i, - host->pmecc_bytes_per_sector, err_nbr); + nand_chip->ecc.bytes, err_nbr); mtd->ecc_stats.corrected += err_nbr; total_err += err_nbr; } @@ -984,11 +985,11 @@ static int atmel_nand_pmecc_write_page(struct mtd_info *mtd, cpu_relax(); } - for (i = 0; i < host->pmecc_sector_number; i++) { - for (j = 0; j < host->pmecc_bytes_per_sector; j++) { + for (i = 0; i < chip->ecc.steps; i++) { + for (j = 0; j < chip->ecc.bytes; j++) { int pos; - pos = i * host->pmecc_bytes_per_sector + j; + pos = i * chip->ecc.bytes + j; chip->oob_poi[eccpos[pos]] = pmecc_readb_ecc_relaxed(host->ecc, i, j); } @@ -1031,7 +1032,7 @@ static void atmel_pmecc_core_init(struct mtd_info *mtd) else if (host->pmecc_sector_size == 1024) val |= PMECC_CFG_SECTOR1024; - switch (host->pmecc_sector_number) { + switch (nand_chip->ecc.steps) { case 1: val |= PMECC_CFG_PAGE_1SECTOR; break; @@ -1148,7 +1149,6 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, host->ecc = devm_ioremap_resource(&pdev->dev, regs); if (IS_ERR(host->ecc)) { - dev_err(host->dev, "ioremap failed\n"); err_no = PTR_ERR(host->ecc); goto err; } @@ -1156,8 +1156,6 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, regs_pmerr = platform_get_resource(pdev, IORESOURCE_MEM, 2); host->pmerrloc_base = devm_ioremap_resource(&pdev->dev, regs_pmerr); if (IS_ERR(host->pmerrloc_base)) { - dev_err(host->dev, - "Can not get I/O resource for PMECC ERRLOC controller!\n"); err_no = PTR_ERR(host->pmerrloc_base); goto err; } @@ -1165,7 +1163,6 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3); host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom); if (IS_ERR(host->pmecc_rom_base)) { - dev_err(host->dev, "Can not get I/O resource for ROM!\n"); err_no = PTR_ERR(host->pmecc_rom_base); goto err; } @@ -1174,22 +1171,29 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, /* set ECC page size and oob layout */ switch (mtd->writesize) { + case 512: + case 1024: case 2048: + case 4096: + case 8192: + if (sector_size > mtd->writesize) { + dev_err(host->dev, "pmecc sector size is bigger than the page size!\n"); + err_no = -EINVAL; + goto err; + } + host->pmecc_degree = (sector_size == 512) ? PMECC_GF_DIMENSION_13 : PMECC_GF_DIMENSION_14; host->pmecc_cw_len = (1 << host->pmecc_degree) - 1; - host->pmecc_sector_number = mtd->writesize / sector_size; - host->pmecc_bytes_per_sector = pmecc_get_ecc_bytes( - cap, sector_size); host->pmecc_alpha_to = pmecc_get_alpha_to(host); host->pmecc_index_of = host->pmecc_rom_base + host->pmecc_lookup_table_offset; - nand_chip->ecc.steps = host->pmecc_sector_number; nand_chip->ecc.strength = cap; - nand_chip->ecc.bytes = host->pmecc_bytes_per_sector; - nand_chip->ecc.total = host->pmecc_bytes_per_sector * - host->pmecc_sector_number; + nand_chip->ecc.bytes = pmecc_get_ecc_bytes(cap, sector_size); + nand_chip->ecc.steps = mtd->writesize / sector_size; + nand_chip->ecc.total = nand_chip->ecc.bytes * + nand_chip->ecc.steps; if (nand_chip->ecc.total > mtd->oobsize - 2) { dev_err(host->dev, "No room for ECC bytes\n"); err_no = -EINVAL; @@ -1201,13 +1205,9 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, nand_chip->ecc.layout = &atmel_pmecc_oobinfo; break; - case 512: - case 1024: - case 4096: - /* TODO */ + default: dev_warn(host->dev, "Unsupported page size for PMECC, use Software ECC\n"); - default: /* page size not handled by HW ECC */ /* switching back to soft ECC */ nand_chip->ecc.mode = NAND_ECC_SOFT; @@ -1530,10 +1530,8 @@ static int atmel_hw_nand_init_params(struct platform_device *pdev, } host->ecc = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(host->ecc)) { - dev_err(host->dev, "ioremap failed\n"); + if (IS_ERR(host->ecc)) return PTR_ERR(host->ecc); - } /* ECC is calculated for the whole page (1 step) */ nand_chip->ecc.size = mtd->writesize; @@ -1907,15 +1905,7 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip, if (offset || (data_len < mtd->writesize)) return -EINVAL; - cfg = nfc_readl(host->nfc->hsmc_regs, CFG); len = mtd->writesize; - - if (unlikely(raw)) { - len += mtd->oobsize; - nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE); - } else - nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE); - /* Copy page data to sram that will write to nand via NFC */ if (use_dma) { if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0) @@ -1925,6 +1915,15 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip, memcpy32_toio(sram, buf, len); } + cfg = nfc_readl(host->nfc->hsmc_regs, CFG); + if (unlikely(raw) && oob_required) { + memcpy32_toio(sram + len, chip->oob_poi, mtd->oobsize); + len += mtd->oobsize; + nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE); + } else { + nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE); + } + if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc) /* * When use NFC sram, need set up PMECC before send @@ -2040,7 +2039,6 @@ static int atmel_nand_probe(struct platform_device *pdev) mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); host->io_base = devm_ioremap_resource(&pdev->dev, mem); if (IS_ERR(host->io_base)) { - dev_err(&pdev->dev, "atmel_nand: ioremap resource failed\n"); res = PTR_ERR(host->io_base); goto err_nand_ioremap; } @@ -2099,7 +2097,7 @@ static int atmel_nand_probe(struct platform_device *pdev) } nand_chip->ecc.mode = host->board.ecc_mode; - nand_chip->chip_delay = 20; /* 20us command delay time */ + nand_chip->chip_delay = 40; /* 40us command delay time */ if (host->board.bus_width_16) /* 16-bit bus width */ nand_chip->options |= NAND_BUSWIDTH_16; @@ -2248,6 +2246,7 @@ static int atmel_nand_nfc_probe(struct platform_device *pdev) { struct atmel_nfc *nfc = &nand_nfc; struct resource *nfc_cmd_regs, *nfc_hsmc_regs, *nfc_sram; + int ret; nfc_cmd_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); nfc->base_cmd_regs = devm_ioremap_resource(&pdev->dev, nfc_cmd_regs); @@ -2279,8 +2278,28 @@ static int atmel_nand_nfc_probe(struct platform_device *pdev) nfc_writel(nfc->hsmc_regs, IDR, 0xffffffff); nfc_readl(nfc->hsmc_regs, SR); /* clear the NFC_SR */ + nfc->clk = devm_clk_get(&pdev->dev, NULL); + if (!IS_ERR(nfc->clk)) { + ret = clk_prepare_enable(nfc->clk); + if (ret) + return ret; + } else { + dev_warn(&pdev->dev, "NFC clock missing, update your Device Tree"); + } + nfc->is_initialized = true; dev_info(&pdev->dev, "NFC is probed.\n"); + + return 0; +} + +static int atmel_nand_nfc_remove(struct platform_device *pdev) +{ + struct atmel_nfc *nfc = &nand_nfc; + + if (!IS_ERR(nfc->clk)) + clk_disable_unprepare(nfc->clk); + return 0; } @@ -2297,6 +2316,7 @@ static struct platform_driver atmel_nand_nfc_driver = { .of_match_table = of_match_ptr(atmel_nand_nfc_match), }, .probe = atmel_nand_nfc_probe, + .remove = atmel_nand_nfc_remove, }; static struct platform_driver atmel_nand_driver = { diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c index b2ab373c9ee..592befc7ffa 100644 --- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c +++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/delay.h> #include <linux/bcma/bcma.h> /* Broadcom uses 1'000'000 but it seems to be too many. Tests on WNDR4500 has @@ -23,6 +24,8 @@ #define NFLASH_SECTOR_SIZE 512 #define NCTL_CMD0 0x00010000 +#define NCTL_COL 0x00020000 /* Update column with value from BCMA_CC_NFLASH_COL_ADDR */ +#define NCTL_ROW 0x00040000 /* Update row (page) with value from BCMA_CC_NFLASH_ROW_ADDR */ #define NCTL_CMD1W 0x00080000 #define NCTL_READ 0x00100000 #define NCTL_WRITE 0x00200000 @@ -109,7 +112,7 @@ static void bcm47xxnflash_ops_bcm4706_read(struct mtd_info *mtd, uint8_t *buf, b47n->curr_page_addr); /* Prepare to read */ - ctlcode = NCTL_CSA | NCTL_CMD1W | 0x00040000 | 0x00020000 | + ctlcode = NCTL_CSA | NCTL_CMD1W | NCTL_ROW | NCTL_COL | NCTL_CMD0; ctlcode |= NAND_CMD_READSTART << 8; if (bcm47xxnflash_ops_bcm4706_ctl_cmd(b47n->cc, ctlcode)) @@ -167,6 +170,26 @@ static void bcm47xxnflash_ops_bcm4706_write(struct mtd_info *mtd, * NAND chip ops **************************************************/ +static void bcm47xxnflash_ops_bcm4706_cmd_ctrl(struct mtd_info *mtd, int cmd, + unsigned int ctrl) +{ + struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv; + struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv; + u32 code = 0; + + if (cmd == NAND_CMD_NONE) + return; + + if (cmd & NAND_CTRL_CLE) + code = cmd | NCTL_CMD0; + + /* nCS is not needed for reset command */ + if (cmd != NAND_CMD_RESET) + code |= NCTL_CSA; + + bcm47xxnflash_ops_bcm4706_ctl_cmd(b47n->cc, code); +} + /* Default nand_select_chip calls cmd_ctrl, which is not used in BCM4706 */ static void bcm47xxnflash_ops_bcm4706_select_chip(struct mtd_info *mtd, int chip) @@ -174,6 +197,14 @@ static void bcm47xxnflash_ops_bcm4706_select_chip(struct mtd_info *mtd, return; } +static int bcm47xxnflash_ops_bcm4706_dev_ready(struct mtd_info *mtd) +{ + struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv; + struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv; + + return !!(bcma_cc_read32(b47n->cc, BCMA_CC_NFLASH_CTL) & NCTL_READY); +} + /* * Default nand_command and nand_command_lp don't match BCM4706 hardware layout. * For example, reading chip id is performed in a non-standard way. @@ -198,7 +229,10 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd, switch (command) { case NAND_CMD_RESET: - pr_warn("Chip reset not implemented yet\n"); + nand_chip->cmd_ctrl(mtd, command, NAND_CTRL_CLE); + + ndelay(100); + nand_wait_ready(mtd); break; case NAND_CMD_READID: ctlcode = NCTL_CSA | 0x01000000 | NCTL_CMD1W | NCTL_CMD0; @@ -242,7 +276,7 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd, case NAND_CMD_ERASE1: bcma_cc_write32(cc, BCMA_CC_NFLASH_ROW_ADDR, b47n->curr_page_addr); - ctlcode = 0x00040000 | NCTL_CMD1W | NCTL_CMD0 | + ctlcode = NCTL_ROW | NCTL_CMD1W | NCTL_CMD0 | NAND_CMD_ERASE1 | (NAND_CMD_ERASE2 << 8); if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, ctlcode)) pr_err("ERASE1 failed\n"); @@ -257,13 +291,13 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd, b47n->curr_page_addr); /* Prepare to write */ - ctlcode = 0x40000000 | 0x00040000 | 0x00020000 | 0x00010000; + ctlcode = 0x40000000 | NCTL_ROW | NCTL_COL | NCTL_CMD0; ctlcode |= NAND_CMD_SEQIN; if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, ctlcode)) pr_err("SEQIN failed\n"); break; case NAND_CMD_PAGEPROG: - if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, 0x00010000 | + if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, NCTL_CMD0 | NAND_CMD_PAGEPROG)) pr_err("PAGEPROG failed\n"); if (bcm47xxnflash_ops_bcm4706_poll(cc)) @@ -341,6 +375,7 @@ static void bcm47xxnflash_ops_bcm4706_write_buf(struct mtd_info *mtd, int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n) { + struct nand_chip *nand_chip = (struct nand_chip *)&b47n->nand_chip; int err; u32 freq; u16 clock; @@ -351,10 +386,14 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n) u32 val; b47n->nand_chip.select_chip = bcm47xxnflash_ops_bcm4706_select_chip; + nand_chip->cmd_ctrl = bcm47xxnflash_ops_bcm4706_cmd_ctrl; + nand_chip->dev_ready = bcm47xxnflash_ops_bcm4706_dev_ready; b47n->nand_chip.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc; b47n->nand_chip.read_byte = bcm47xxnflash_ops_bcm4706_read_byte; b47n->nand_chip.read_buf = bcm47xxnflash_ops_bcm4706_read_buf; b47n->nand_chip.write_buf = bcm47xxnflash_ops_bcm4706_write_buf; + + nand_chip->chip_delay = 50; b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH; b47n->nand_chip.ecc.mode = NAND_ECC_NONE; /* TODO: implement ECC */ @@ -364,11 +403,13 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n) /* Configure wait counters */ if (b47n->cc->status & BCMA_CC_CHIPST_4706_PKG_OPTION) { - freq = 100000000; + /* 400 MHz */ + freq = 400000000 / 4; } else { freq = bcma_chipco_pll_read(b47n->cc, 4); - freq = (freq * 0xFFF) >> 3; - freq = (freq * 25000000) >> 3; + freq = (freq & 0xFFF) >> 3; + /* Fixed reference clock 25 MHz and m = 2 */ + freq = (freq * 25000000 / 2) / 4; } clock = freq / 1000000; w0 = bcm47xxnflash_ops_bcm4706_ns_to_cycle(15, clock); diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 0b071a3136a..b3b7ca1bafb 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -29,20 +29,23 @@ MODULE_LICENSE("GPL"); -/* We define a module parameter that allows the user to override +/* + * We define a module parameter that allows the user to override * the hardware and decide what timing mode should be used. */ #define NAND_DEFAULT_TIMINGS -1 static int onfi_timing_mode = NAND_DEFAULT_TIMINGS; module_param(onfi_timing_mode, int, S_IRUGO); -MODULE_PARM_DESC(onfi_timing_mode, "Overrides default ONFI setting." - " -1 indicates use default timings"); +MODULE_PARM_DESC(onfi_timing_mode, + "Overrides default ONFI setting. -1 indicates use default timings"); #define DENALI_NAND_NAME "denali-nand" -/* We define a macro here that combines all interrupts this driver uses into - * a single constant value, for convenience. */ +/* + * We define a macro here that combines all interrupts this driver uses into + * a single constant value, for convenience. + */ #define DENALI_IRQ_ALL (INTR_STATUS__DMA_CMD_COMP | \ INTR_STATUS__ECC_TRANSACTION_DONE | \ INTR_STATUS__ECC_ERR | \ @@ -54,26 +57,34 @@ MODULE_PARM_DESC(onfi_timing_mode, "Overrides default ONFI setting." INTR_STATUS__RST_COMP | \ INTR_STATUS__ERASE_COMP) -/* indicates whether or not the internal value for the flash bank is - * valid or not */ +/* + * indicates whether or not the internal value for the flash bank is + * valid or not + */ #define CHIP_SELECT_INVALID -1 #define SUPPORT_8BITECC 1 -/* This macro divides two integers and rounds fractional values up - * to the nearest integer value. */ +/* + * This macro divides two integers and rounds fractional values up + * to the nearest integer value. + */ #define CEIL_DIV(X, Y) (((X)%(Y)) ? ((X)/(Y)+1) : ((X)/(Y))) -/* this macro allows us to convert from an MTD structure to our own +/* + * this macro allows us to convert from an MTD structure to our own * device context (denali) structure. */ #define mtd_to_denali(m) container_of(m, struct denali_nand_info, mtd) -/* These constants are defined by the driver to enable common driver - * configuration options. */ +/* + * These constants are defined by the driver to enable common driver + * configuration options. + */ #define SPARE_ACCESS 0x41 #define MAIN_ACCESS 0x42 #define MAIN_SPARE_ACCESS 0x43 +#define PIPELINE_ACCESS 0x2000 #define DENALI_READ 0 #define DENALI_WRITE 0x100 @@ -83,8 +94,10 @@ MODULE_PARM_DESC(onfi_timing_mode, "Overrides default ONFI setting." #define ADDR_CYCLE 1 #define STATUS_CYCLE 2 -/* this is a helper macro that allows us to - * format the bank into the proper bits for the controller */ +/* + * this is a helper macro that allows us to + * format the bank into the proper bits for the controller + */ #define BANK(x) ((x) << 24) /* forward declarations */ @@ -95,12 +108,12 @@ static void denali_irq_enable(struct denali_nand_info *denali, uint32_t int_mask); static uint32_t read_interrupt_status(struct denali_nand_info *denali); -/* Certain operations for the denali NAND controller use - * an indexed mode to read/write data. The operation is - * performed by writing the address value of the command - * to the device memory followed by the data. This function +/* + * Certain operations for the denali NAND controller use an indexed mode to + * read/write data. The operation is performed by writing the address value + * of the command to the device memory followed by the data. This function * abstracts this common operation. -*/ + */ static void index_addr(struct denali_nand_info *denali, uint32_t address, uint32_t data) { @@ -116,8 +129,10 @@ static void index_addr_read_data(struct denali_nand_info *denali, *pdata = ioread32(denali->flash_mem + 0x10); } -/* We need to buffer some data for some of the NAND core routines. - * The operations manage buffering that data. */ +/* + * We need to buffer some data for some of the NAND core routines. + * The operations manage buffering that data. + */ static void reset_buf(struct denali_nand_info *denali) { denali->buf.head = denali->buf.tail = 0; @@ -131,7 +146,7 @@ static void write_byte_to_buf(struct denali_nand_info *denali, uint8_t byte) /* reads the status of the device */ static void read_status(struct denali_nand_info *denali) { - uint32_t cmd = 0x0; + uint32_t cmd; /* initialize the data buffer to store status */ reset_buf(denali); @@ -146,9 +161,8 @@ static void read_status(struct denali_nand_info *denali) /* resets a specific device connected to the core */ static void reset_bank(struct denali_nand_info *denali) { - uint32_t irq_status = 0; - uint32_t irq_mask = INTR_STATUS__RST_COMP | - INTR_STATUS__TIME_OUT; + uint32_t irq_status; + uint32_t irq_mask = INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT; clear_interrupts(denali); @@ -163,19 +177,18 @@ static void reset_bank(struct denali_nand_info *denali) /* Reset the flash controller */ static uint16_t denali_nand_reset(struct denali_nand_info *denali) { - uint32_t i; + int i; dev_dbg(denali->dev, "%s, Line %d, Function: %s\n", - __FILE__, __LINE__, __func__); + __FILE__, __LINE__, __func__); - for (i = 0 ; i < denali->max_banks; i++) + for (i = 0; i < denali->max_banks; i++) iowrite32(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT, denali->flash_reg + INTR_STATUS(i)); - for (i = 0 ; i < denali->max_banks; i++) { + for (i = 0; i < denali->max_banks; i++) { iowrite32(1 << i, denali->flash_reg + DEVICE_RESET); - while (!(ioread32(denali->flash_reg + - INTR_STATUS(i)) & + while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) & (INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT))) cpu_relax(); if (ioread32(denali->flash_reg + INTR_STATUS(i)) & @@ -186,12 +199,13 @@ static uint16_t denali_nand_reset(struct denali_nand_info *denali) for (i = 0; i < denali->max_banks; i++) iowrite32(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT, - denali->flash_reg + INTR_STATUS(i)); + denali->flash_reg + INTR_STATUS(i)); return PASS; } -/* this routine calculates the ONFI timing values for a given mode and +/* + * this routine calculates the ONFI timing values for a given mode and * programs the clocking register accordingly. The mode is determined by * the get_onfi_nand_para routine. */ @@ -219,7 +233,7 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali, uint16_t addr_2_data, re_2_we, re_2_re, we_2_re, cs_cnt; dev_dbg(denali->dev, "%s, Line %d, Function: %s\n", - __FILE__, __LINE__, __func__); + __FILE__, __LINE__, __func__); en_lo = CEIL_DIV(Trp[mode], CLK_X); en_hi = CEIL_DIV(Treh[mode], CLK_X); @@ -239,9 +253,8 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali, data_invalid_rloh = (en_lo + en_hi) * CLK_X + Trloh[mode]; - data_invalid = - data_invalid_rhoh < - data_invalid_rloh ? data_invalid_rhoh : data_invalid_rloh; + data_invalid = data_invalid_rhoh < data_invalid_rloh ? + data_invalid_rhoh : data_invalid_rloh; dv_window = data_invalid - Trea[mode]; @@ -251,12 +264,12 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali, acc_clks = CEIL_DIV(Trea[mode], CLK_X); - while (((acc_clks * CLK_X) - Trea[mode]) < 3) + while (acc_clks * CLK_X - Trea[mode] < 3) acc_clks++; - if ((data_invalid - acc_clks * CLK_X) < 2) + if (data_invalid - acc_clks * CLK_X < 2) dev_warn(denali->dev, "%s, Line %d: Warning!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); addr_2_data = CEIL_DIV(Tadl[mode], CLK_X); re_2_we = CEIL_DIV(Trhw[mode], CLK_X); @@ -269,7 +282,7 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali, cs_cnt = 1; if (Tcea[mode]) { - while (((cs_cnt * CLK_X) + Trea[mode]) < Tcea[mode]) + while (cs_cnt * CLK_X + Trea[mode] < Tcea[mode]) cs_cnt++; } @@ -279,8 +292,8 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali, #endif /* Sighting 3462430: Temporary hack for MT29F128G08CJABAWP:B */ - if ((ioread32(denali->flash_reg + MANUFACTURER_ID) == 0) && - (ioread32(denali->flash_reg + DEVICE_ID) == 0x88)) + if (ioread32(denali->flash_reg + MANUFACTURER_ID) == 0 && + ioread32(denali->flash_reg + DEVICE_ID) == 0x88) acc_clks = 6; iowrite32(acc_clks, denali->flash_reg + ACC_CLKS); @@ -297,9 +310,11 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali, static uint16_t get_onfi_nand_para(struct denali_nand_info *denali) { int i; - /* we needn't to do a reset here because driver has already + + /* + * we needn't to do a reset here because driver has already * reset all the banks before - * */ + */ if (!(ioread32(denali->flash_reg + ONFI_TIMING_MODE) & ONFI_TIMING_MODE__VALUE)) return FAIL; @@ -312,8 +327,10 @@ static uint16_t get_onfi_nand_para(struct denali_nand_info *denali) nand_onfi_timing_set(denali, i); - /* By now, all the ONFI devices we know support the page cache */ - /* rw feature. So here we enable the pipeline_rw_ahead feature */ + /* + * By now, all the ONFI devices we know support the page cache + * rw feature. So here we enable the pipeline_rw_ahead feature + */ /* iowrite32(1, denali->flash_reg + CACHE_WRITE_ENABLE); */ /* iowrite32(1, denali->flash_reg + CACHE_READ_ENABLE); */ @@ -339,8 +356,10 @@ static void get_toshiba_nand_para(struct denali_nand_info *denali) { uint32_t tmp; - /* Workaround to fix a controller bug which reports a wrong */ - /* spare area size for some kind of Toshiba NAND device */ + /* + * Workaround to fix a controller bug which reports a wrong + * spare area size for some kind of Toshiba NAND device + */ if ((ioread32(denali->flash_reg + DEVICE_MAIN_AREA_SIZE) == 4096) && (ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64)) { iowrite32(216, denali->flash_reg + DEVICE_SPARE_AREA_SIZE); @@ -384,13 +403,14 @@ static void get_hynix_nand_para(struct denali_nand_info *denali, break; default: dev_warn(denali->dev, - "Spectra: Unknown Hynix NAND (Device ID: 0x%x)." - "Will use default parameter values instead.\n", - device_id); + "Spectra: Unknown Hynix NAND (Device ID: 0x%x).\n" + "Will use default parameter values instead.\n", + device_id); } } -/* determines how many NAND chips are connected to the controller. Note for +/* + * determines how many NAND chips are connected to the controller. Note for * Intel CE4100 devices we don't support more than one device. */ static void find_valid_banks(struct denali_nand_info *denali) @@ -400,10 +420,9 @@ static void find_valid_banks(struct denali_nand_info *denali) denali->total_used_banks = 1; for (i = 0; i < denali->max_banks; i++) { - index_addr(denali, (uint32_t)(MODE_11 | (i << 24) | 0), 0x90); - index_addr(denali, (uint32_t)(MODE_11 | (i << 24) | 1), 0); - index_addr_read_data(denali, - (uint32_t)(MODE_11 | (i << 24) | 2), &id[i]); + index_addr(denali, MODE_11 | (i << 24) | 0, 0x90); + index_addr(denali, MODE_11 | (i << 24) | 1, 0); + index_addr_read_data(denali, MODE_11 | (i << 24) | 2, &id[i]); dev_dbg(denali->dev, "Return 1st ID for bank[%d]: %x\n", i, id[i]); @@ -420,14 +439,14 @@ static void find_valid_banks(struct denali_nand_info *denali) } if (denali->platform == INTEL_CE4100) { - /* Platform limitations of the CE4100 device limit + /* + * Platform limitations of the CE4100 device limit * users to a single chip solution for NAND. * Multichip support is not enabled. */ if (denali->total_used_banks != 1) { dev_err(denali->dev, - "Sorry, Intel CE4100 only supports " - "a single NAND device.\n"); + "Sorry, Intel CE4100 only supports a single NAND device.\n"); BUG(); } } @@ -448,12 +467,13 @@ static void detect_max_banks(struct denali_nand_info *denali) static void detect_partition_feature(struct denali_nand_info *denali) { - /* For MRST platform, denali->fwblks represent the + /* + * For MRST platform, denali->fwblks represent the * number of blocks firmware is taken, * FW is in protect partition and MTD driver has no * permission to access it. So let driver know how many * blocks it can't touch. - * */ + */ if (ioread32(denali->flash_reg + FEATURES) & FEATURES__PARTITION) { if ((ioread32(denali->flash_reg + PERM_SRC_ID(1)) & PERM_SRC_ID__SRCID) == SPECTRA_PARTITION_ID) { @@ -464,30 +484,32 @@ static void detect_partition_feature(struct denali_nand_info *denali) + (ioread32(denali->flash_reg + MIN_BLK_ADDR(1)) & MIN_BLK_ADDR__VALUE); - } else + } else { denali->fwblks = SPECTRA_START_BLOCK; - } else + } + } else { denali->fwblks = SPECTRA_START_BLOCK; + } } static uint16_t denali_nand_timing_set(struct denali_nand_info *denali) { uint16_t status = PASS; uint32_t id_bytes[8], addr; - uint8_t i, maf_id, device_id; + uint8_t maf_id, device_id; + int i; - dev_dbg(denali->dev, - "%s, Line %d, Function: %s\n", + dev_dbg(denali->dev, "%s, Line %d, Function: %s\n", __FILE__, __LINE__, __func__); - /* Use read id method to get device ID and other - * params. For some NAND chips, controller can't - * report the correct device ID by reading from - * DEVICE_ID register - * */ - addr = (uint32_t)MODE_11 | BANK(denali->flash_bank); - index_addr(denali, (uint32_t)addr | 0, 0x90); - index_addr(denali, (uint32_t)addr | 1, 0); + /* + * Use read id method to get device ID and other params. + * For some NAND chips, controller can't report the correct + * device ID by reading from DEVICE_ID register + */ + addr = MODE_11 | BANK(denali->flash_bank); + index_addr(denali, addr | 0, 0x90); + index_addr(denali, addr | 1, 0); for (i = 0; i < 8; i++) index_addr_read_data(denali, addr | 2, &id_bytes[i]); maf_id = id_bytes[0]; @@ -506,7 +528,7 @@ static uint16_t denali_nand_timing_set(struct denali_nand_info *denali) } dev_info(denali->dev, - "Dump timing register values:" + "Dump timing register values:\n" "acc_clks: %d, re_2_we: %d, re_2_re: %d\n" "we_2_re: %d, addr_2_data: %d, rdwr_en_lo_cnt: %d\n" "rdwr_en_hi_cnt: %d, cs_setup_cnt: %d\n", @@ -523,7 +545,8 @@ static uint16_t denali_nand_timing_set(struct denali_nand_info *denali) detect_partition_feature(denali); - /* If the user specified to override the default timings + /* + * If the user specified to override the default timings * with a specific ONFI mode, we apply those changes here. */ if (onfi_timing_mode != NAND_DEFAULT_TIMINGS) @@ -536,7 +559,7 @@ static void denali_set_intr_modes(struct denali_nand_info *denali, uint16_t INT_ENABLE) { dev_dbg(denali->dev, "%s, Line %d, Function: %s\n", - __FILE__, __LINE__, __func__); + __FILE__, __LINE__, __func__); if (INT_ENABLE) iowrite32(1, denali->flash_reg + GLOBAL_INT_ENABLE); @@ -544,17 +567,18 @@ static void denali_set_intr_modes(struct denali_nand_info *denali, iowrite32(0, denali->flash_reg + GLOBAL_INT_ENABLE); } -/* validation function to verify that the controlling software is making +/* + * validation function to verify that the controlling software is making * a valid request */ static inline bool is_flash_bank_valid(int flash_bank) { - return (flash_bank >= 0 && flash_bank < 4); + return flash_bank >= 0 && flash_bank < 4; } static void denali_irq_init(struct denali_nand_info *denali) { - uint32_t int_mask = 0; + uint32_t int_mask; int i; /* Disable global interrupts */ @@ -584,7 +608,8 @@ static void denali_irq_enable(struct denali_nand_info *denali, iowrite32(int_mask, denali->flash_reg + INTR_EN(i)); } -/* This function only returns when an interrupt that this driver cares about +/* + * This function only returns when an interrupt that this driver cares about * occurs. This is to reduce the overhead of servicing interrupts */ static inline uint32_t denali_irq_detected(struct denali_nand_info *denali) @@ -596,7 +621,7 @@ static inline uint32_t denali_irq_detected(struct denali_nand_info *denali) static inline void clear_interrupt(struct denali_nand_info *denali, uint32_t irq_mask) { - uint32_t intr_status_reg = 0; + uint32_t intr_status_reg; intr_status_reg = INTR_STATUS(denali->flash_bank); @@ -605,7 +630,8 @@ static inline void clear_interrupt(struct denali_nand_info *denali, static void clear_interrupts(struct denali_nand_info *denali) { - uint32_t status = 0x0; + uint32_t status; + spin_lock_irq(&denali->irq_lock); status = read_interrupt_status(denali); @@ -617,38 +643,40 @@ static void clear_interrupts(struct denali_nand_info *denali) static uint32_t read_interrupt_status(struct denali_nand_info *denali) { - uint32_t intr_status_reg = 0; + uint32_t intr_status_reg; intr_status_reg = INTR_STATUS(denali->flash_bank); return ioread32(denali->flash_reg + intr_status_reg); } -/* This is the interrupt service routine. It handles all interrupts - * sent to this device. Note that on CE4100, this is a shared - * interrupt. +/* + * This is the interrupt service routine. It handles all interrupts + * sent to this device. Note that on CE4100, this is a shared interrupt. */ static irqreturn_t denali_isr(int irq, void *dev_id) { struct denali_nand_info *denali = dev_id; - uint32_t irq_status = 0x0; + uint32_t irq_status; irqreturn_t result = IRQ_NONE; spin_lock(&denali->irq_lock); - /* check to see if a valid NAND chip has - * been selected. - */ + /* check to see if a valid NAND chip has been selected. */ if (is_flash_bank_valid(denali->flash_bank)) { - /* check to see if controller generated - * the interrupt, since this is a shared interrupt */ + /* + * check to see if controller generated the interrupt, + * since this is a shared interrupt + */ irq_status = denali_irq_detected(denali); if (irq_status != 0) { /* handle interrupt */ /* first acknowledge it */ clear_interrupt(denali, irq_status); - /* store the status in the device context for someone - to read */ + /* + * store the status in the device context for someone + * to read + */ denali->irq_status |= irq_status; /* notify anyone who cares that it happened */ complete(&denali->complete); @@ -663,9 +691,8 @@ static irqreturn_t denali_isr(int irq, void *dev_id) static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask) { - unsigned long comp_res = 0; - uint32_t intr_status = 0; - bool retry = false; + unsigned long comp_res; + uint32_t intr_status; unsigned long timeout = msecs_to_jiffies(1000); do { @@ -679,12 +706,13 @@ static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask) spin_unlock_irq(&denali->irq_lock); /* our interrupt was detected */ break; - } else { - /* these are not the interrupts you are looking for - - * need to wait again */ - spin_unlock_irq(&denali->irq_lock); - retry = true; } + + /* + * these are not the interrupts you are looking for - + * need to wait again + */ + spin_unlock_irq(&denali->irq_lock); } while (comp_res != 0); if (comp_res == 0) { @@ -697,12 +725,14 @@ static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask) return intr_status; } -/* This helper function setups the registers for ECC and whether or not - * the spare area will be transferred. */ +/* + * This helper function setups the registers for ECC and whether or not + * the spare area will be transferred. + */ static void setup_ecc_for_xfer(struct denali_nand_info *denali, bool ecc_en, bool transfer_spare) { - int ecc_en_flag = 0, transfer_spare_flag = 0; + int ecc_en_flag, transfer_spare_flag; /* set ECC, transfer spare bits if needed */ ecc_en_flag = ecc_en ? ECC_ENABLE__FLAG : 0; @@ -710,22 +740,20 @@ static void setup_ecc_for_xfer(struct denali_nand_info *denali, bool ecc_en, /* Enable spare area/ECC per user's request. */ iowrite32(ecc_en_flag, denali->flash_reg + ECC_ENABLE); - iowrite32(transfer_spare_flag, - denali->flash_reg + TRANSFER_SPARE_REG); + iowrite32(transfer_spare_flag, denali->flash_reg + TRANSFER_SPARE_REG); } -/* sends a pipeline command operation to the controller. See the Denali NAND +/* + * sends a pipeline command operation to the controller. See the Denali NAND * controller's user guide for more information (section 4.2.3.6). */ static int denali_send_pipeline_cmd(struct denali_nand_info *denali, - bool ecc_en, - bool transfer_spare, - int access_type, - int op) + bool ecc_en, bool transfer_spare, + int access_type, int op) { int status = PASS; - uint32_t addr = 0x0, cmd = 0x0, page_count = 1, irq_status = 0, - irq_mask = 0; + uint32_t page_count = 1; + uint32_t addr, cmd, irq_status, irq_mask; if (op == DENALI_READ) irq_mask = INTR_STATUS__LOAD_COMP; @@ -736,7 +764,6 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali, setup_ecc_for_xfer(denali, ecc_en, transfer_spare); - /* clear interrupts */ clear_interrupts(denali); addr = BANK(denali->flash_bank) | denali->page; @@ -747,37 +774,38 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali, } else if (op == DENALI_WRITE && access_type == SPARE_ACCESS) { /* read spare area */ cmd = MODE_10 | addr; - index_addr(denali, (uint32_t)cmd, access_type); + index_addr(denali, cmd, access_type); cmd = MODE_01 | addr; iowrite32(cmd, denali->flash_mem); } else if (op == DENALI_READ) { /* setup page read request for access type */ cmd = MODE_10 | addr; - index_addr(denali, (uint32_t)cmd, access_type); + index_addr(denali, cmd, access_type); - /* page 33 of the NAND controller spec indicates we should not - use the pipeline commands in Spare area only mode. So we - don't. + /* + * page 33 of the NAND controller spec indicates we should not + * use the pipeline commands in Spare area only mode. + * So we don't. */ if (access_type == SPARE_ACCESS) { cmd = MODE_01 | addr; iowrite32(cmd, denali->flash_mem); } else { - index_addr(denali, (uint32_t)cmd, - 0x2000 | op | page_count); + index_addr(denali, cmd, + PIPELINE_ACCESS | op | page_count); - /* wait for command to be accepted + /* + * wait for command to be accepted * can always use status0 bit as the - * mask is identical for each - * bank. */ + * mask is identical for each bank. + */ irq_status = wait_for_irq(denali, irq_mask); if (irq_status == 0) { dev_err(denali->dev, - "cmd, page, addr on timeout " - "(0x%x, 0x%x, 0x%x)\n", - cmd, denali->page, addr); + "cmd, page, addr on timeout (0x%x, 0x%x, 0x%x)\n", + cmd, denali->page, addr); status = FAIL; } else { cmd = MODE_01 | addr; @@ -790,51 +818,51 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali, /* helper function that simply writes a buffer to the flash */ static int write_data_to_flash_mem(struct denali_nand_info *denali, - const uint8_t *buf, - int len) + const uint8_t *buf, int len) { - uint32_t i = 0, *buf32; + uint32_t *buf32; + int i; - /* verify that the len is a multiple of 4. see comment in - * read_data_from_flash_mem() */ + /* + * verify that the len is a multiple of 4. + * see comment in read_data_from_flash_mem() + */ BUG_ON((len % 4) != 0); /* write the data to the flash memory */ buf32 = (uint32_t *)buf; for (i = 0; i < len / 4; i++) iowrite32(*buf32++, denali->flash_mem + 0x10); - return i*4; /* intent is to return the number of bytes read */ + return i * 4; /* intent is to return the number of bytes read */ } /* helper function that simply reads a buffer from the flash */ static int read_data_from_flash_mem(struct denali_nand_info *denali, - uint8_t *buf, - int len) + uint8_t *buf, int len) { - uint32_t i = 0, *buf32; - - /* we assume that len will be a multiple of 4, if not - * it would be nice to know about it ASAP rather than - * have random failures... - * This assumption is based on the fact that this - * function is designed to be used to read flash pages, - * which are typically multiples of 4... - */ + uint32_t *buf32; + int i; + /* + * we assume that len will be a multiple of 4, if not it would be nice + * to know about it ASAP rather than have random failures... + * This assumption is based on the fact that this function is designed + * to be used to read flash pages, which are typically multiples of 4. + */ BUG_ON((len % 4) != 0); /* transfer the data from the flash */ buf32 = (uint32_t *)buf; for (i = 0; i < len / 4; i++) *buf32++ = ioread32(denali->flash_mem + 0x10); - return i*4; /* intent is to return the number of bytes read */ + return i * 4; /* intent is to return the number of bytes read */ } /* writes OOB data to the device */ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page) { struct denali_nand_info *denali = mtd_to_denali(mtd); - uint32_t irq_status = 0; + uint32_t irq_status; uint32_t irq_mask = INTR_STATUS__PROGRAM_COMP | INTR_STATUS__PROGRAM_FAIL; int status = 0; @@ -863,8 +891,8 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page) static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page) { struct denali_nand_info *denali = mtd_to_denali(mtd); - uint32_t irq_mask = INTR_STATUS__LOAD_COMP, - irq_status = 0, addr = 0x0, cmd = 0x0; + uint32_t irq_mask = INTR_STATUS__LOAD_COMP; + uint32_t irq_status, addr, cmd; denali->page = page; @@ -872,16 +900,19 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page) DENALI_READ) == PASS) { read_data_from_flash_mem(denali, buf, mtd->oobsize); - /* wait for command to be accepted - * can always use status0 bit as the mask is identical for each - * bank. */ + /* + * wait for command to be accepted + * can always use status0 bit as the + * mask is identical for each bank. + */ irq_status = wait_for_irq(denali, irq_mask); if (irq_status == 0) dev_err(denali->dev, "page on OOB timeout %d\n", denali->page); - /* We set the device back to MAIN_ACCESS here as I observed + /* + * We set the device back to MAIN_ACCESS here as I observed * instability with the controller if you do a block erase * and the last transaction was a SPARE_ACCESS. Block erase * is reliable (according to the MTD test infrastructure) @@ -889,16 +920,18 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page) */ addr = BANK(denali->flash_bank) | denali->page; cmd = MODE_10 | addr; - index_addr(denali, (uint32_t)cmd, MAIN_ACCESS); + index_addr(denali, cmd, MAIN_ACCESS); } } -/* this function examines buffers to see if they contain data that +/* + * this function examines buffers to see if they contain data that * indicate that the buffer is part of an erased region of flash. */ static bool is_erased(uint8_t *buf, int len) { - int i = 0; + int i; + for (i = 0; i < len; i++) if (buf[i] != 0xFF) return false; @@ -921,9 +954,8 @@ static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf, if (irq_status & INTR_STATUS__ECC_ERR) { /* read the ECC errors. we'll ignore them for now */ - uint32_t err_address = 0, err_correction_info = 0; - uint32_t err_byte = 0, err_sector = 0, err_device = 0; - uint32_t err_correction_value = 0; + uint32_t err_address, err_correction_info, err_byte, + err_sector, err_device, err_correction_value; denali_set_intr_modes(denali, false); do { @@ -939,15 +971,17 @@ static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf, err_device = ECC_ERR_DEVICE(err_correction_info); if (ECC_ERROR_CORRECTABLE(err_correction_info)) { - /* If err_byte is larger than ECC_SECTOR_SIZE, + /* + * If err_byte is larger than ECC_SECTOR_SIZE, * means error happened in OOB, so we ignore * it. It's no need for us to correct it * err_device is represented the NAND error * bits are happened in if there are more * than one NAND connected. - * */ + */ if (err_byte < ECC_SECTOR_SIZE) { int offset; + offset = (err_sector * ECC_SECTOR_SIZE + err_byte) * @@ -959,17 +993,19 @@ static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf, bitflips++; } } else { - /* if the error is not correctable, need to + /* + * if the error is not correctable, need to * look at the page to see if it is an erased * page. if so, then it's not a real ECC error - * */ + */ check_erased_page = true; } } while (!ECC_LAST_ERR(err_correction_info)); - /* Once handle all ecc errors, controller will triger + /* + * Once handle all ecc errors, controller will triger * a ECC_TRANSACTION_DONE interrupt, so here just wait * for a while for this interrupt - * */ + */ while (!(read_interrupt_status(denali) & INTR_STATUS__ECC_TRANSACTION_DONE)) cpu_relax(); @@ -983,21 +1019,16 @@ static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf, /* programs the controller to either enable/disable DMA transfers */ static void denali_enable_dma(struct denali_nand_info *denali, bool en) { - uint32_t reg_val = 0x0; - - if (en) - reg_val = DMA_ENABLE__FLAG; - - iowrite32(reg_val, denali->flash_reg + DMA_ENABLE); + iowrite32(en ? DMA_ENABLE__FLAG : 0, denali->flash_reg + DMA_ENABLE); ioread32(denali->flash_reg + DMA_ENABLE); } /* setups the HW to perform the data DMA */ static void denali_setup_dma(struct denali_nand_info *denali, int op) { - uint32_t mode = 0x0; + uint32_t mode; const int page_count = 1; - dma_addr_t addr = denali->buf.dma_buf; + uint32_t addr = denali->buf.dma_buf; mode = MODE_10 | BANK(denali->flash_bank); @@ -1007,31 +1038,31 @@ static void denali_setup_dma(struct denali_nand_info *denali, int op) index_addr(denali, mode | denali->page, 0x2000 | op | page_count); /* 2. set memory high address bits 23:8 */ - index_addr(denali, mode | ((uint16_t)(addr >> 16) << 8), 0x2200); + index_addr(denali, mode | ((addr >> 16) << 8), 0x2200); /* 3. set memory low address bits 23:8 */ - index_addr(denali, mode | ((uint16_t)addr << 8), 0x2300); + index_addr(denali, mode | ((addr & 0xff) << 8), 0x2300); - /* 4. interrupt when complete, burst len = 64 bytes*/ + /* 4. interrupt when complete, burst len = 64 bytes */ index_addr(denali, mode | 0x14000, 0x2400); } -/* writes a page. user specifies type, and this function handles the - * configuration details. */ +/* + * writes a page. user specifies type, and this function handles the + * configuration details. + */ static int write_page(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, bool raw_xfer) { struct denali_nand_info *denali = mtd_to_denali(mtd); - dma_addr_t addr = denali->buf.dma_buf; size_t size = denali->mtd.writesize + denali->mtd.oobsize; - - uint32_t irq_status = 0; + uint32_t irq_status; uint32_t irq_mask = INTR_STATUS__DMA_CMD_COMP | INTR_STATUS__PROGRAM_FAIL; - /* if it is a raw xfer, we want to disable ecc, and send - * the spare area. + /* + * if it is a raw xfer, we want to disable ecc and send the spare area. * !raw_xfer - enable ecc * raw_xfer - transfer spare */ @@ -1058,12 +1089,9 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip, irq_status = wait_for_irq(denali, irq_mask); if (irq_status == 0) { - dev_err(denali->dev, - "timeout on write_page (type = %d)\n", - raw_xfer); - denali->status = - (irq_status & INTR_STATUS__PROGRAM_FAIL) ? - NAND_STATUS_FAIL : PASS; + dev_err(denali->dev, "timeout on write_page (type = %d)\n", + raw_xfer); + denali->status = NAND_STATUS_FAIL; } denali_enable_dma(denali, false); @@ -1074,27 +1102,33 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip, /* NAND core entry points */ -/* this is the callback that the NAND core calls to write a page. Since +/* + * this is the callback that the NAND core calls to write a page. Since * writing a page with ECC or without is similar, all the work is done * by write_page above. - * */ + */ static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required) { - /* for regular page writes, we let HW handle all the ECC - * data written to the device. */ + /* + * for regular page writes, we let HW handle all the ECC + * data written to the device. + */ return write_page(mtd, chip, buf, false); } -/* This is the callback that the NAND core calls to write a page without ECC. +/* + * This is the callback that the NAND core calls to write a page without ECC. * raw access is similar to ECC page writes, so all the work is done in the * write_page() function above. */ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required) { - /* for raw page writes, we want to disable ECC and simply write - whatever data is in the buffer. */ + /* + * for raw page writes, we want to disable ECC and simply write + * whatever data is in the buffer. + */ return write_page(mtd, chip, buf, true); } @@ -1121,15 +1155,15 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip, dma_addr_t addr = denali->buf.dma_buf; size_t size = denali->mtd.writesize + denali->mtd.oobsize; - uint32_t irq_status = 0; + uint32_t irq_status; uint32_t irq_mask = INTR_STATUS__ECC_TRANSACTION_DONE | INTR_STATUS__ECC_ERR; bool check_erased_page = false; if (page != denali->page) { - dev_err(denali->dev, "IN %s: page %d is not" - " equal to denali->page %d, investigate!!", - __func__, page, denali->page); + dev_err(denali->dev, + "IN %s: page %d is not equal to denali->page %d", + __func__, page, denali->page); BUG(); } @@ -1169,17 +1203,14 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { struct denali_nand_info *denali = mtd_to_denali(mtd); - dma_addr_t addr = denali->buf.dma_buf; size_t size = denali->mtd.writesize + denali->mtd.oobsize; - - uint32_t irq_status = 0; uint32_t irq_mask = INTR_STATUS__DMA_CMD_COMP; if (page != denali->page) { - dev_err(denali->dev, "IN %s: page %d is not" - " equal to denali->page %d, investigate!!", - __func__, page, denali->page); + dev_err(denali->dev, + "IN %s: page %d is not equal to denali->page %d", + __func__, page, denali->page); BUG(); } @@ -1192,7 +1223,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, denali_setup_dma(denali, DENALI_READ); /* wait for operation to complete */ - irq_status = wait_for_irq(denali, irq_mask); + wait_for_irq(denali, irq_mask); dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE); @@ -1228,6 +1259,7 @@ static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip) { struct denali_nand_info *denali = mtd_to_denali(mtd); int status = denali->status; + denali->status = 0; return status; @@ -1237,20 +1269,19 @@ static int denali_erase(struct mtd_info *mtd, int page) { struct denali_nand_info *denali = mtd_to_denali(mtd); - uint32_t cmd = 0x0, irq_status = 0; + uint32_t cmd, irq_status; - /* clear interrupts */ clear_interrupts(denali); /* setup page read request for access type */ cmd = MODE_10 | BANK(denali->flash_bank) | page; - index_addr(denali, (uint32_t)cmd, 0x1); + index_addr(denali, cmd, 0x1); /* wait for erase to complete or failure to occur */ irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP | INTR_STATUS__ERASE_FAIL); - return (irq_status & INTR_STATUS__ERASE_FAIL) ? NAND_STATUS_FAIL : PASS; + return irq_status & INTR_STATUS__ERASE_FAIL ? NAND_STATUS_FAIL : PASS; } static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col, @@ -1269,17 +1300,16 @@ static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col, case NAND_CMD_READID: case NAND_CMD_PARAM: reset_buf(denali); - /*sometimes ManufactureId read from register is not right + /* + * sometimes ManufactureId read from register is not right * e.g. some of Micron MT29F32G08QAA MLC NAND chips * So here we send READID cmd to NAND insteand - * */ - addr = (uint32_t)MODE_11 | BANK(denali->flash_bank); - index_addr(denali, (uint32_t)addr | 0, 0x90); - index_addr(denali, (uint32_t)addr | 1, 0); + */ + addr = MODE_11 | BANK(denali->flash_bank); + index_addr(denali, addr | 0, 0x90); + index_addr(denali, addr | 1, 0); for (i = 0; i < 8; i++) { - index_addr_read_data(denali, - (uint32_t)addr | 2, - &id); + index_addr_read_data(denali, addr | 2, &id); write_byte_to_buf(denali, id); } break; @@ -1304,8 +1334,8 @@ static int denali_ecc_calculate(struct mtd_info *mtd, const uint8_t *data, uint8_t *ecc_code) { struct denali_nand_info *denali = mtd_to_denali(mtd); - dev_err(denali->dev, - "denali_ecc_calculate called unexpectedly\n"); + + dev_err(denali->dev, "denali_ecc_calculate called unexpectedly\n"); BUG(); return -EIO; } @@ -1314,8 +1344,8 @@ static int denali_ecc_correct(struct mtd_info *mtd, uint8_t *data, uint8_t *read_ecc, uint8_t *calc_ecc) { struct denali_nand_info *denali = mtd_to_denali(mtd); - dev_err(denali->dev, - "denali_ecc_correct called unexpectedly\n"); + + dev_err(denali->dev, "denali_ecc_correct called unexpectedly\n"); BUG(); return -EIO; } @@ -1323,8 +1353,8 @@ static int denali_ecc_correct(struct mtd_info *mtd, uint8_t *data, static void denali_ecc_hwctl(struct mtd_info *mtd, int mode) { struct denali_nand_info *denali = mtd_to_denali(mtd); - dev_err(denali->dev, - "denali_ecc_hwctl called unexpectedly\n"); + + dev_err(denali->dev, "denali_ecc_hwctl called unexpectedly\n"); BUG(); } /* end NAND core entry points */ @@ -1332,11 +1362,12 @@ static void denali_ecc_hwctl(struct mtd_info *mtd, int mode) /* Initialization code to bring the device up to a known good state */ static void denali_hw_init(struct denali_nand_info *denali) { - /* tell driver how many bit controller will skip before + /* + * tell driver how many bit controller will skip before * writing ECC code in OOB, this register may be already * set by firmware. So we read this value out. * if this value is 0, just let it be. - * */ + */ denali->bbtskipbytes = ioread32(denali->flash_reg + SPARE_AREA_SKIP_BYTES); detect_max_banks(denali); @@ -1354,10 +1385,11 @@ static void denali_hw_init(struct denali_nand_info *denali) denali_irq_init(denali); } -/* Althogh controller spec said SLC ECC is forceb to be 4bit, +/* + * Althogh controller spec said SLC ECC is forceb to be 4bit, * but denali controller in MRST only support 15bit and 8bit ECC * correction - * */ + */ #define ECC_8BITS 14 static struct nand_ecclayout nand_8bit_oob = { .eccbytes = 14, @@ -1397,13 +1429,16 @@ static void denali_drv_init(struct denali_nand_info *denali) denali->idx = 0; /* setup interrupt handler */ - /* the completion object will be used to notify - * the callee that the interrupt is done */ + /* + * the completion object will be used to notify + * the callee that the interrupt is done + */ init_completion(&denali->complete); - /* the spinlock will be used to synchronize the ISR - * with any element that might be access shared - * data (interrupt status) */ + /* + * the spinlock will be used to synchronize the ISR with any + * element that might be access shared data (interrupt status) + */ spin_lock_init(&denali->irq_lock); /* indicate that MTD has not selected a valid bank yet */ @@ -1418,7 +1453,8 @@ int denali_init(struct denali_nand_info *denali) int ret; if (denali->platform == INTEL_CE4100) { - /* Due to a silicon limitation, we can only support + /* + * Due to a silicon limitation, we can only support * ONFI timing mode 1 and below. */ if (onfi_timing_mode < -1 || onfi_timing_mode > 1) { @@ -1437,8 +1473,10 @@ int denali_init(struct denali_nand_info *denali) denali_hw_init(denali); denali_drv_init(denali); - /* denali_isr register is done after all the hardware - * initilization is finished*/ + /* + * denali_isr register is done after all the hardware + * initilization is finished + */ if (request_irq(denali->irq, denali_isr, IRQF_SHARED, DENALI_NAND_NAME, denali)) { pr_err("Spectra: Unable to allocate IRQ\n"); @@ -1457,9 +1495,11 @@ int denali_init(struct denali_nand_info *denali) denali->nand.read_byte = denali_read_byte; denali->nand.waitfunc = denali_waitfunc; - /* scan for NAND devices attached to the controller + /* + * scan for NAND devices attached to the controller * this is the first stage in a two step process to register - * with the nand subsystem */ + * with the nand subsystem + */ if (nand_scan_ident(&denali->mtd, denali->max_banks, NULL)) { ret = -ENXIO; goto failed_req_irq; @@ -1491,10 +1531,10 @@ int denali_init(struct denali_nand_info *denali) goto failed_req_irq; } - /* support for multi nand - * MTD known nothing about multi nand, - * so we should tell it the real pagesize - * and anything necessery + /* + * support for multi nand + * MTD known nothing about multi nand, so we should tell it + * the real pagesize and anything necessery */ denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED); denali->nand.chipsize <<= (denali->devnum - 1); @@ -1510,9 +1550,11 @@ int denali_init(struct denali_nand_info *denali) denali->mtd.size = denali->nand.numchips * denali->nand.chipsize; denali->bbtskipbytes *= denali->devnum; - /* second stage of the NAND scan + /* + * second stage of the NAND scan * this stage requires information regarding ECC and - * bad block management. */ + * bad block management. + */ /* Bad block management */ denali->nand.bbt_td = &bbt_main_descr; @@ -1523,7 +1565,8 @@ int denali_init(struct denali_nand_info *denali) denali->nand.options |= NAND_SKIP_BBTSCAN; denali->nand.ecc.mode = NAND_ECC_HW_SYNDROME; - /* Denali Controller only support 15bit and 8bit ECC in MRST, + /* + * Denali Controller only support 15bit and 8bit ECC in MRST, * so just let controller do 15bit ECC for MLC and 8bit ECC for * SLC if possible. * */ @@ -1539,8 +1582,7 @@ int denali_init(struct denali_nand_info *denali) } else if (denali->mtd.oobsize < (denali->bbtskipbytes + ECC_8BITS * (denali->mtd.writesize / ECC_SECTOR_SIZE))) { - pr_err("Your NAND chip OOB is not large enough to \ - contain 8bit ECC correction codes"); + pr_err("Your NAND chip OOB is not large enough to contain 8bit ECC correction codes"); goto failed_req_irq; } else { denali->nand.ecc.strength = 8; @@ -1559,18 +1601,19 @@ int denali_init(struct denali_nand_info *denali) denali->mtd.oobsize - denali->nand.ecc.layout->eccbytes - denali->bbtskipbytes; - /* Let driver know the total blocks number and - * how many blocks contained by each nand chip. - * blksperchip will help driver to know how many - * blocks is taken by FW. - * */ - denali->totalblks = denali->mtd.size >> - denali->nand.phys_erase_shift; + /* + * Let driver know the total blocks number and how many blocks + * contained by each nand chip. blksperchip will help driver to + * know how many blocks is taken by FW. + */ + denali->totalblks = denali->mtd.size >> denali->nand.phys_erase_shift; denali->blksperchip = denali->totalblks / denali->nand.numchips; - /* These functions are required by the NAND core framework, otherwise, + /* + * These functions are required by the NAND core framework, otherwise, * the NAND core will assert. However, we don't need them, so we'll stub - * them out. */ + * them out. + */ denali->nand.ecc.calculate = denali_ecc_calculate; denali->nand.ecc.correct = denali_ecc_correct; denali->nand.ecc.hwctl = denali_ecc_hwctl; @@ -1610,7 +1653,7 @@ void denali_remove(struct denali_nand_info *denali) { denali_irq_cleanup(denali->irq, denali); dma_unmap_single(denali->dev, denali->buf.dma_buf, - denali->mtd.writesize + denali->mtd.oobsize, - DMA_BIDIRECTIONAL); + denali->mtd.writesize + denali->mtd.oobsize, + DMA_BIDIRECTIONAL); } EXPORT_SYMBOL(denali_remove); diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h index 96681746242..145bf88930e 100644 --- a/drivers/mtd/nand/denali.h +++ b/drivers/mtd/nand/denali.h @@ -17,6 +17,9 @@ * */ +#ifndef __DENALI_H__ +#define __DENALI_H__ + #include <linux/mtd/nand.h> #define DEVICE_RESET 0x0 @@ -400,28 +403,6 @@ #define ONFI_BLOOM_TIME 1 #define MODE5_WORKAROUND 0 -/* lld_nand.h */ -/* - * NAND Flash Controller Device Driver - * Copyright (c) 2009, Intel Corporation and its suppliers. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#ifndef _LLD_NAND_ -#define _LLD_NAND_ #define MODE_00 0x00000000 #define MODE_01 0x04000000 @@ -499,4 +480,4 @@ struct denali_nand_info { extern int denali_init(struct denali_nand_info *denali); extern void denali_remove(struct denali_nand_info *denali); -#endif /*_LLD_NAND_*/ +#endif /* __DENALI_H__ */ diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index d8cdf06343f..5b5c6271281 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -982,6 +982,15 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) chip->select_chip(mtd, chipnr); + /* + * Reset the chip. + * If we want to check the WP through READ STATUS and check the bit 7 + * we must reset the chip + * some operation can also clear the bit 7 of status register + * eg. erase/program a locked block + */ + chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + /* Check, if it is write protected */ if (nand_check_wp(mtd)) { pr_debug("%s: device is write protected!\n", @@ -1032,6 +1041,15 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) chip->select_chip(mtd, chipnr); + /* + * Reset the chip. + * If we want to check the WP through READ STATUS and check the bit 7 + * we must reset the chip + * some operation can also clear the bit 7 of status register + * eg. erase/program a locked block + */ + chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + /* Check, if it is write protected */ if (nand_check_wp(mtd)) { pr_debug("%s: device is write protected!\n", @@ -2391,8 +2409,8 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, blockmask = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1; /* Invalidate the page cache, when we write to the cached page */ - if (to <= (chip->pagebuf << chip->page_shift) && - (chip->pagebuf << chip->page_shift) < (to + ops->len)) + if (to <= ((loff_t)chip->pagebuf << chip->page_shift) && + ((loff_t)chip->pagebuf << chip->page_shift) < (to + ops->len)) chip->pagebuf = -1; /* Don't allow multipage oob writes with offset */ @@ -3576,6 +3594,8 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip, chip->options |= type->options; chip->ecc_strength_ds = NAND_ECC_STRENGTH(type); chip->ecc_step_ds = NAND_ECC_STEP(type); + chip->onfi_timing_mode_default = + type->onfi_timing_mode_default; *busw = type->options & NAND_BUSWIDTH_16; @@ -3918,8 +3938,7 @@ int nand_scan_tail(struct mtd_info *mtd) case NAND_ECC_HW_OOB_FIRST: /* Similar to NAND_ECC_HW, but a separate read_page handle */ if (!ecc->calculate || !ecc->correct || !ecc->hwctl) { - pr_warn("No ECC functions supplied; " - "hardware ECC not possible\n"); + pr_warn("No ECC functions supplied; hardware ECC not possible\n"); BUG(); } if (!ecc->read_page) @@ -3950,8 +3969,7 @@ int nand_scan_tail(struct mtd_info *mtd) ecc->read_page == nand_read_page_hwecc || !ecc->write_page || ecc->write_page == nand_write_page_hwecc)) { - pr_warn("No ECC functions supplied; " - "hardware ECC not possible\n"); + pr_warn("No ECC functions supplied; hardware ECC not possible\n"); BUG(); } /* Use standard syndrome read/write page function? */ @@ -3975,9 +3993,8 @@ int nand_scan_tail(struct mtd_info *mtd) } break; } - pr_warn("%d byte HW ECC not possible on " - "%d byte page size, fallback to SW ECC\n", - ecc->size, mtd->writesize); + pr_warn("%d byte HW ECC not possible on %d byte page size, fallback to SW ECC\n", + ecc->size, mtd->writesize); ecc->mode = NAND_ECC_SOFT; case NAND_ECC_SOFT: @@ -4030,8 +4047,7 @@ int nand_scan_tail(struct mtd_info *mtd) break; case NAND_ECC_NONE: - pr_warn("NAND_ECC_NONE selected by board driver. " - "This is not recommended!\n"); + pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n"); ecc->read_page = nand_read_page_raw; ecc->write_page = nand_write_page_raw; ecc->read_oob = nand_read_oob_std; diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 443fa82cde6..9bb8453d224 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -201,12 +201,12 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num, res = mtd_read(mtd, from, len, &retlen, buf); if (res < 0) { if (mtd_is_eccerr(res)) { - pr_info("nand_bbt: ECC error in BBT at " - "0x%012llx\n", from & ~mtd->writesize); + pr_info("nand_bbt: ECC error in BBT at 0x%012llx\n", + from & ~mtd->writesize); return res; } else if (mtd_is_bitflip(res)) { - pr_info("nand_bbt: corrected error in BBT at " - "0x%012llx\n", from & ~mtd->writesize); + pr_info("nand_bbt: corrected error in BBT at 0x%012llx\n", + from & ~mtd->writesize); ret = res; } else { pr_info("nand_bbt: error reading BBT\n"); @@ -580,8 +580,8 @@ static int search_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr if (td->pages[i] == -1) pr_warn("Bad block table not found for chip %d\n", i); else - pr_info("Bad block table found at page %d, version " - "0x%02X\n", td->pages[i], td->version[i]); + pr_info("Bad block table found at page %d, version 0x%02X\n", + td->pages[i], td->version[i]); } return 0; } @@ -725,12 +725,10 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, res = mtd_read(mtd, to, len, &retlen, buf); if (res < 0) { if (retlen != len) { - pr_info("nand_bbt: error reading block " - "for writing the bad block table\n"); + pr_info("nand_bbt: error reading block for writing the bad block table\n"); return res; } - pr_warn("nand_bbt: ECC error while reading " - "block for writing bad block table\n"); + pr_warn("nand_bbt: ECC error while reading block for writing bad block table\n"); } /* Read oob data */ ops.ooblen = (len >> this->page_shift) * mtd->oobsize; @@ -1338,9 +1336,8 @@ int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt) block = (int)(offs >> this->bbt_erase_shift); res = bbt_get_entry(this, block); - pr_debug("nand_isbad_bbt(): bbt info for offs 0x%08x: " - "(block %d) 0x%02x\n", - (unsigned int)offs, block, res); + pr_debug("nand_isbad_bbt(): bbt info for offs 0x%08x: (block %d) 0x%02x\n", + (unsigned int)offs, block, res); switch (res) { case BBT_BLOCK_GOOD: diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index 3d7c89fc103..fbde8910524 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -46,6 +46,10 @@ struct nand_flash_dev nand_flash_ids[] = { {"SDTNRGAMA 64G 3.3V 8-bit", { .id = {0x45, 0xde, 0x94, 0x93, 0x76, 0x50} }, SZ_16K, SZ_8K, SZ_4M, 0, 6, 1280, NAND_ECC_INFO(40, SZ_1K) }, + {"H27UCG8T2ATR-BC 64G 3.3V 8-bit", + { .id = {0xad, 0xde, 0x94, 0xda, 0x74, 0xc4} }, + SZ_8K, SZ_8K, SZ_2M, 0, 6, 640, NAND_ECC_INFO(40, SZ_1K), + 4 }, LEGACY_ID_NAND("NAND 4MiB 5V 8-bit", 0x6B, 4, SZ_8K, SP_OPTIONS), LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS), diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c index 8b36253420f..e81470a8ac6 100644 --- a/drivers/mtd/nand/nand_timings.c +++ b/drivers/mtd/nand/nand_timings.c @@ -42,7 +42,7 @@ static const struct nand_sdr_timings onfi_sdr_timings[] = { .tRHZ_max = 200000, .tRLOH_min = 0, .tRP_min = 50000, - .tRST_max = 250000000000, + .tRST_max = 250000000000ULL, .tWB_max = 200000, .tRR_min = 40000, .tWC_min = 100000, diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 4f0d83648e5..7dc1dd28d89 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -827,7 +827,7 @@ static int parse_badblocks(struct nandsim *ns, struct mtd_info *mtd) NS_ERR("invalid badblocks.\n"); return -EINVAL; } - offset = erase_block_no * ns->geom.secsz; + offset = (loff_t)erase_block_no * ns->geom.secsz; if (mtd_block_markbad(mtd, offset)) { NS_ERR("invalid badblocks.\n"); return -EINVAL; diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c index 69eaba690a9..253a644da76 100644 --- a/drivers/mtd/nand/ndfc.c +++ b/drivers/mtd/nand/ndfc.c @@ -203,7 +203,8 @@ static int ndfc_probe(struct platform_device *ofdev) struct ndfc_controller *ndfc; const __be32 *reg; u32 ccr; - int err, len, cs; + u32 cs; + int err, len; /* Read the reg property to get the chip select */ reg = of_get_property(ofdev->dev.of_node, "reg", &len); diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 5967b385141..3b357e920a0 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -136,7 +136,6 @@ #define BADBLOCK_MARKER_LENGTH 2 -#ifdef CONFIG_MTD_NAND_OMAP_BCH static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55, 0x2e, 0x2c, 0x86, 0xa3, 0xed, 0x36, 0x1b, 0x78, 0x48, 0x76, 0xa9, 0x3b, 0x97, 0xd1, 0x7a, 0x93, @@ -144,7 +143,6 @@ static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55, static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc, 0xac, 0x6b, 0xff, 0x99, 0x7b}; static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10}; -#endif /* oob info generated runtime depending on ecc algorithm and layout selected */ static struct nand_ecclayout omap_oobinfo; @@ -1292,7 +1290,6 @@ static int __maybe_unused omap_calculate_ecc_bch(struct mtd_info *mtd, return 0; } -#ifdef CONFIG_MTD_NAND_OMAP_BCH /** * erased_sector_bitflips - count bit flips * @data: data sector buffer @@ -1378,7 +1375,7 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data, erased_ecc_vec = bch16_vector; break; default: - pr_err("invalid driver configuration\n"); + dev_err(&info->pdev->dev, "invalid driver configuration\n"); return -EINVAL; } @@ -1449,7 +1446,8 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data, err = 0; for (i = 0; i < eccsteps; i++) { if (err_vec[i].error_uncorrectable) { - pr_err("nand: uncorrectable bit-flips found\n"); + dev_err(&info->pdev->dev, + "uncorrectable bit-flips found\n"); err = -EBADMSG; } else if (err_vec[i].error_reported) { for (j = 0; j < err_vec[i].error_count; j++) { @@ -1486,8 +1484,9 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data, 1 << bit_pos; } } else { - pr_err("invalid bit-flip @ %d:%d\n", - byte_pos, bit_pos); + dev_err(&info->pdev->dev, + "invalid bit-flip @ %d:%d\n", + byte_pos, bit_pos); err = -EBADMSG; } } @@ -1593,33 +1592,71 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip, /** * is_elm_present - checks for presence of ELM module by scanning DT nodes * @omap_nand_info: NAND device structure containing platform data - * @bch_type: 0x0=BCH4, 0x1=BCH8, 0x2=BCH16 */ -static int is_elm_present(struct omap_nand_info *info, - struct device_node *elm_node, enum bch_ecc bch_type) +static bool is_elm_present(struct omap_nand_info *info, + struct device_node *elm_node) { struct platform_device *pdev; - struct nand_ecc_ctrl *ecc = &info->nand.ecc; - int err; + /* check whether elm-id is passed via DT */ if (!elm_node) { - pr_err("nand: error: ELM DT node not found\n"); - return -ENODEV; + dev_err(&info->pdev->dev, "ELM devicetree node not found\n"); + return false; } pdev = of_find_device_by_node(elm_node); /* check whether ELM device is registered */ if (!pdev) { - pr_err("nand: error: ELM device not found\n"); - return -ENODEV; + dev_err(&info->pdev->dev, "ELM device not found\n"); + return false; } /* ELM module available, now configure it */ info->elm_dev = &pdev->dev; - err = elm_config(info->elm_dev, bch_type, - (info->mtd.writesize / ecc->size), ecc->size, ecc->bytes); + return true; +} - return err; +static bool omap2_nand_ecc_check(struct omap_nand_info *info, + struct omap_nand_platform_data *pdata) +{ + bool ecc_needs_bch, ecc_needs_omap_bch, ecc_needs_elm; + + switch (info->ecc_opt) { + case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW: + case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW: + ecc_needs_omap_bch = false; + ecc_needs_bch = true; + ecc_needs_elm = false; + break; + case OMAP_ECC_BCH4_CODE_HW: + case OMAP_ECC_BCH8_CODE_HW: + case OMAP_ECC_BCH16_CODE_HW: + ecc_needs_omap_bch = true; + ecc_needs_bch = false; + ecc_needs_elm = true; + break; + default: + ecc_needs_omap_bch = false; + ecc_needs_bch = false; + ecc_needs_elm = false; + break; + } + + if (ecc_needs_bch && !IS_ENABLED(CONFIG_MTD_NAND_ECC_BCH)) { + dev_err(&info->pdev->dev, + "CONFIG_MTD_NAND_ECC_BCH not enabled\n"); + return false; + } + if (ecc_needs_omap_bch && !IS_ENABLED(CONFIG_MTD_NAND_OMAP_BCH)) { + dev_err(&info->pdev->dev, + "CONFIG_MTD_NAND_OMAP_BCH not enabled\n"); + return false; + } + if (ecc_needs_elm && !is_elm_present(info, pdata->elm_of_node)) { + dev_err(&info->pdev->dev, "ELM not available\n"); + return false; + } + + return true; } -#endif /* CONFIG_MTD_NAND_ECC_BCH */ static int omap_nand_probe(struct platform_device *pdev) { @@ -1663,7 +1700,6 @@ static int omap_nand_probe(struct platform_device *pdev) mtd->owner = THIS_MODULE; nand_chip = &info->nand; nand_chip->ecc.priv = NULL; - nand_chip->options |= NAND_SKIP_BBTSCAN; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res); @@ -1692,17 +1728,22 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->chip_delay = 50; } + if (pdata->flash_bbt) + nand_chip->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB; + else + nand_chip->options |= NAND_SKIP_BBTSCAN; + /* scan NAND device connected to chip controller */ nand_chip->options |= pdata->devsize & NAND_BUSWIDTH_16; if (nand_scan_ident(mtd, 1, NULL)) { - pr_err("nand device scan failed, may be bus-width mismatch\n"); + dev_err(&info->pdev->dev, "scan failed, may be bus-width mismatch\n"); err = -ENXIO; goto return_error; } /* check for small page devices */ if ((mtd->oobsize < 64) && (pdata->ecc_opt != OMAP_ECC_HAM1_CODE_HW)) { - pr_err("small page devices are not supported\n"); + dev_err(&info->pdev->dev, "small page devices are not supported\n"); err = -EINVAL; goto return_error; } @@ -1793,6 +1834,11 @@ static int omap_nand_probe(struct platform_device *pdev) goto return_error; } + if (!omap2_nand_ecc_check(info, pdata)) { + err = -EINVAL; + goto return_error; + } + /* populate MTD interface based on ECC scheme */ ecclayout = &omap_oobinfo; switch (info->ecc_opt) { @@ -1825,7 +1871,6 @@ static int omap_nand_probe(struct platform_device *pdev) break; case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW: -#ifdef CONFIG_MTD_NAND_ECC_BCH pr_info("nand: using OMAP_ECC_BCH4_CODE_HW_DETECTION_SW\n"); nand_chip->ecc.mode = NAND_ECC_HW; nand_chip->ecc.size = 512; @@ -1853,18 +1898,13 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.bytes, &ecclayout); if (!nand_chip->ecc.priv) { - pr_err("nand: error: unable to use s/w BCH library\n"); + dev_err(&info->pdev->dev, "unable to use BCH library\n"); err = -EINVAL; + goto return_error; } break; -#else - pr_err("nand: error: CONFIG_MTD_NAND_ECC_BCH not enabled\n"); - err = -EINVAL; - goto return_error; -#endif case OMAP_ECC_BCH4_CODE_HW: -#ifdef CONFIG_MTD_NAND_OMAP_BCH pr_info("nand: using OMAP_ECC_BCH4_CODE_HW ECC scheme\n"); nand_chip->ecc.mode = NAND_ECC_HW; nand_chip->ecc.size = 512; @@ -1886,21 +1926,15 @@ static int omap_nand_probe(struct platform_device *pdev) /* reserved marker already included in ecclayout->eccbytes */ ecclayout->oobfree->offset = ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; - /* This ECC scheme requires ELM H/W block */ - if (is_elm_present(info, pdata->elm_of_node, BCH4_ECC) < 0) { - pr_err("nand: error: could not initialize ELM\n"); - err = -ENODEV; + + err = elm_config(info->elm_dev, BCH4_ECC, + info->mtd.writesize / nand_chip->ecc.size, + nand_chip->ecc.size, nand_chip->ecc.bytes); + if (err < 0) goto return_error; - } break; -#else - pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n"); - err = -EINVAL; - goto return_error; -#endif case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW: -#ifdef CONFIG_MTD_NAND_ECC_BCH pr_info("nand: using OMAP_ECC_BCH8_CODE_HW_DETECTION_SW\n"); nand_chip->ecc.mode = NAND_ECC_HW; nand_chip->ecc.size = 512; @@ -1928,19 +1962,13 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.bytes, &ecclayout); if (!nand_chip->ecc.priv) { - pr_err("nand: error: unable to use s/w BCH library\n"); + dev_err(&info->pdev->dev, "unable to use BCH library\n"); err = -EINVAL; goto return_error; } break; -#else - pr_err("nand: error: CONFIG_MTD_NAND_ECC_BCH not enabled\n"); - err = -EINVAL; - goto return_error; -#endif case OMAP_ECC_BCH8_CODE_HW: -#ifdef CONFIG_MTD_NAND_OMAP_BCH pr_info("nand: using OMAP_ECC_BCH8_CODE_HW ECC scheme\n"); nand_chip->ecc.mode = NAND_ECC_HW; nand_chip->ecc.size = 512; @@ -1952,12 +1980,13 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.calculate = omap_calculate_ecc_bch; nand_chip->ecc.read_page = omap_read_page_bch; nand_chip->ecc.write_page = omap_write_page_bch; - /* This ECC scheme requires ELM H/W block */ - err = is_elm_present(info, pdata->elm_of_node, BCH8_ECC); - if (err < 0) { - pr_err("nand: error: could not initialize ELM\n"); + + err = elm_config(info->elm_dev, BCH8_ECC, + info->mtd.writesize / nand_chip->ecc.size, + nand_chip->ecc.size, nand_chip->ecc.bytes); + if (err < 0) goto return_error; - } + /* define ECC layout */ ecclayout->eccbytes = nand_chip->ecc.bytes * (mtd->writesize / @@ -1969,14 +1998,8 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->oobfree->offset = ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; break; -#else - pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n"); - err = -EINVAL; - goto return_error; -#endif case OMAP_ECC_BCH16_CODE_HW: -#ifdef CONFIG_MTD_NAND_OMAP_BCH pr_info("using OMAP_ECC_BCH16_CODE_HW ECC scheme\n"); nand_chip->ecc.mode = NAND_ECC_HW; nand_chip->ecc.size = 512; @@ -1987,12 +2010,13 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.calculate = omap_calculate_ecc_bch; nand_chip->ecc.read_page = omap_read_page_bch; nand_chip->ecc.write_page = omap_write_page_bch; - /* This ECC scheme requires ELM H/W block */ - err = is_elm_present(info, pdata->elm_of_node, BCH16_ECC); - if (err < 0) { - pr_err("ELM is required for this ECC scheme\n"); + + err = elm_config(info->elm_dev, BCH16_ECC, + info->mtd.writesize / nand_chip->ecc.size, + nand_chip->ecc.size, nand_chip->ecc.bytes); + if (err < 0) goto return_error; - } + /* define ECC layout */ ecclayout->eccbytes = nand_chip->ecc.bytes * (mtd->writesize / @@ -2004,13 +2028,8 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->oobfree->offset = ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; break; -#else - pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n"); - err = -EINVAL; - goto return_error; -#endif default: - pr_err("nand: error: invalid or unsupported ECC scheme\n"); + dev_err(&info->pdev->dev, "invalid or unsupported ECC scheme\n"); err = -EINVAL; goto return_error; } @@ -2022,8 +2041,9 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset; /* check if NAND device's OOB is enough to store ECC signatures */ if (mtd->oobsize < (ecclayout->eccbytes + BADBLOCK_MARKER_LENGTH)) { - pr_err("not enough OOB bytes required = %d, available=%d\n", - ecclayout->eccbytes, mtd->oobsize); + dev_err(&info->pdev->dev, + "not enough OOB bytes required = %d, available=%d\n", + ecclayout->eccbytes, mtd->oobsize); err = -EINVAL; goto return_error; } diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/nand/omap_elm.c index b4f61c7fc16..b4f61c7fc16 100644 --- a/drivers/mtd/devices/elm.c +++ b/drivers/mtd/nand/omap_elm.c diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h index 00f4a83359b..d3e028e58b0 100644 --- a/drivers/mtd/nand/sm_common.h +++ b/drivers/mtd/nand/sm_common.h @@ -18,7 +18,7 @@ struct sm_oob { uint8_t ecc2[3]; uint8_t lba_copy2[2]; uint8_t ecc1[3]; -} __attribute__((packed)); +} __packed; /* one sector is always 512 bytes, but it can consist of two nand pages */ diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index cf49c22673b..c23184a47fc 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -1058,7 +1058,7 @@ static int sm_write(struct mtd_blktrans_dev *dev, { struct sm_ftl *ftl = dev->priv; struct ftl_zone *zone; - int error, zone_num, block, boffset; + int error = 0, zone_num, block, boffset; BUG_ON(ftl->readonly); sm_break_offset(ftl, sec_no << 9, &zone_num, &block, &boffset); diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig index f8acfa4310e..64a4f0edabc 100644 --- a/drivers/mtd/spi-nor/Kconfig +++ b/drivers/mtd/spi-nor/Kconfig @@ -7,6 +7,20 @@ menuconfig MTD_SPI_NOR if MTD_SPI_NOR +config MTD_SPI_NOR_USE_4K_SECTORS + bool "Use small 4096 B erase sectors" + default y + help + Many flash memories support erasing small (4096 B) sectors. Depending + on the usage this feature may provide performance gain in comparison + to erasing whole blocks (32/64 KiB). + Changing a small part of the flash's contents is usually faster with + small sectors. On the other hand erasing should be faster when using + 64 KiB block instead of 16 × 4 KiB sectors. + + Please note that some tools/drivers/filesystems may not work with + 4096 B erase size (e.g. UBIFS requires 15 KiB as a minimum). + config SPI_FSL_QUADSPI tristate "Freescale Quad SPI controller" depends on ARCH_MXC diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index b5ad6bebf5e..ae16aa2f688 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -611,6 +611,7 @@ const struct spi_device_id spi_nor_ids[] = { { "m25px32-s0", INFO(0x207316, 0, 64 * 1024, 64, SECT_4K) }, { "m25px32-s1", INFO(0x206316, 0, 64 * 1024, 64, SECT_4K) }, { "m25px64", INFO(0x207117, 0, 64 * 1024, 128, 0) }, + { "m25px80", INFO(0x207114, 0, 64 * 1024, 16, 0) }, /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */ { "w25x10", INFO(0xef3011, 0, 64 * 1024, 2, SECT_4K) }, @@ -623,7 +624,6 @@ const struct spi_device_id spi_nor_ids[] = { { "w25q32dw", INFO(0xef6016, 0, 64 * 1024, 64, SECT_4K) }, { "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) }, { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) }, - { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) }, { "w25q80", INFO(0xef5014, 0, 64 * 1024, 16, SECT_4K) }, { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16, SECT_4K) }, { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) }, @@ -671,11 +671,6 @@ static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor) return ERR_PTR(-ENODEV); } -static const struct spi_device_id *jedec_probe(struct spi_nor *nor) -{ - return nor->read_id(nor); -} - static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { @@ -920,7 +915,6 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, enum read_mode mode) { struct flash_info *info; - struct flash_platform_data *data; struct device *dev = nor->dev; struct mtd_info *mtd = nor->mtd; struct device_node *np = dev->of_node; @@ -931,34 +925,12 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, if (ret) return ret; - /* Platform data helps sort out which chip type we have, as - * well as how this board partitions it. If we don't have - * a chip ID, try the JEDEC id commands; they'll work for most - * newer chips, even if we don't recognize the particular chip. - */ - data = dev_get_platdata(dev); - if (data && data->type) { - const struct spi_device_id *plat_id; - - for (i = 0; i < ARRAY_SIZE(spi_nor_ids) - 1; i++) { - plat_id = &spi_nor_ids[i]; - if (strcmp(data->type, plat_id->name)) - continue; - break; - } - - if (i < ARRAY_SIZE(spi_nor_ids) - 1) - id = plat_id; - else - dev_warn(dev, "unrecognized id %s\n", data->type); - } - info = (void *)id->driver_data; if (info->jedec_id) { const struct spi_device_id *jid; - jid = jedec_probe(nor); + jid = nor->read_id(nor); if (IS_ERR(jid)) { return PTR_ERR(jid); } else if (jid != id) { @@ -990,11 +962,8 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, write_sr(nor, 0); } - if (data && data->name) - mtd->name = data->name; - else + if (!mtd->name) mtd->name = dev_name(dev); - mtd->type = MTD_NORFLASH; mtd->writesize = 1; mtd->flags = MTD_CAP_NORFLASH; @@ -1018,6 +987,7 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, nor->wait_till_ready == spi_nor_wait_till_ready) nor->wait_till_ready = spi_nor_wait_till_fsr_ready; +#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS /* prefer "small sector" erase if possible */ if (info->flags & SECT_4K) { nor->erase_opcode = SPINOR_OP_BE_4K; @@ -1025,7 +995,9 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, } else if (info->flags & SECT_4K_PMC) { nor->erase_opcode = SPINOR_OP_BE_4K_PMC; mtd->erasesize = 4096; - } else { + } else +#endif + { nor->erase_opcode = SPINOR_OP_SE; mtd->erasesize = info->sector_size; } diff --git a/drivers/mtd/tests/mtd_test.c b/drivers/mtd/tests/mtd_test.c index 111ee46a742..34736bbcc07 100644 --- a/drivers/mtd/tests/mtd_test.c +++ b/drivers/mtd/tests/mtd_test.c @@ -10,7 +10,7 @@ int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum) { int err; struct erase_info ei; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; memset(&ei, 0, sizeof(struct erase_info)); ei.mtd = mtd; @@ -33,7 +33,7 @@ int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum) static int is_block_bad(struct mtd_info *mtd, unsigned int ebnum) { int ret; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; ret = mtd_block_isbad(mtd, addr); if (ret) diff --git a/drivers/mtd/tests/nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c index 6f976159611..273f7e55395 100644 --- a/drivers/mtd/tests/nandbiterrs.c +++ b/drivers/mtd/tests/nandbiterrs.c @@ -364,7 +364,7 @@ static int __init mtd_nandbiterrs_init(void) pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize); - offset = page_offset * mtd->writesize; + offset = (loff_t)page_offset * mtd->writesize; eraseblock = mtd_div_by_eb(offset, mtd); pr_info("Using page=%u, offset=%llu, eraseblock=%u\n", diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c index f19ab1acde1..dc4f9602b97 100644 --- a/drivers/mtd/tests/oobtest.c +++ b/drivers/mtd/tests/oobtest.c @@ -120,7 +120,7 @@ static int verify_eraseblock(int ebnum) int i; struct mtd_oob_ops ops; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt); for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { @@ -214,7 +214,7 @@ static int verify_eraseblock_in_one_go(int ebnum) { struct mtd_oob_ops ops; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; size_t len = mtd->ecclayout->oobavail * pgcnt; prandom_bytes_state(&rnd_state, writebuf, len); @@ -568,7 +568,7 @@ static int __init mtd_oobtest_init(void) size_t sz = mtd->ecclayout->oobavail; if (bbt[i] || bbt[i + 1]) continue; - addr = (i + 1) * mtd->erasesize - mtd->writesize; + addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize; prandom_bytes_state(&rnd_state, writebuf, sz * cnt); for (pg = 0; pg < cnt; ++pg) { ops.mode = MTD_OPS_AUTO_OOB; @@ -598,7 +598,7 @@ static int __init mtd_oobtest_init(void) continue; prandom_bytes_state(&rnd_state, writebuf, mtd->ecclayout->oobavail * 2); - addr = (i + 1) * mtd->erasesize - mtd->writesize; + addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize; ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; diff --git a/drivers/mtd/tests/pagetest.c b/drivers/mtd/tests/pagetest.c index ed2d3f656fd..88296e888e9 100644 --- a/drivers/mtd/tests/pagetest.c +++ b/drivers/mtd/tests/pagetest.c @@ -52,7 +52,7 @@ static struct rnd_state rnd_state; static int write_eraseblock(int ebnum) { - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize); cond_resched(); @@ -64,7 +64,7 @@ static int verify_eraseblock(int ebnum) uint32_t j; int err = 0, i; loff_t addr0, addrn; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; addr0 = 0; for (i = 0; i < ebcnt && bbt[i]; ++i) diff --git a/drivers/mtd/tests/readtest.c b/drivers/mtd/tests/readtest.c index 626e66d0f7e..a54cf151111 100644 --- a/drivers/mtd/tests/readtest.c +++ b/drivers/mtd/tests/readtest.c @@ -47,7 +47,7 @@ static int pgcnt; static int read_eraseblock_by_page(int ebnum) { int i, ret, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; void *oobbuf = iobuf1; diff --git a/drivers/mtd/tests/speedtest.c b/drivers/mtd/tests/speedtest.c index 87ff6a29f84..5ee9f702102 100644 --- a/drivers/mtd/tests/speedtest.c +++ b/drivers/mtd/tests/speedtest.c @@ -55,7 +55,7 @@ static int multiblock_erase(int ebnum, int blocks) { int err; struct erase_info ei; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; memset(&ei, 0, sizeof(struct erase_info)); ei.mtd = mtd; @@ -80,7 +80,7 @@ static int multiblock_erase(int ebnum, int blocks) static int write_eraseblock(int ebnum) { - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; return mtdtest_write(mtd, addr, mtd->erasesize, iobuf); } @@ -88,7 +88,7 @@ static int write_eraseblock(int ebnum) static int write_eraseblock_by_page(int ebnum) { int i, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < pgcnt; i++) { @@ -106,7 +106,7 @@ static int write_eraseblock_by_2pages(int ebnum) { size_t sz = pgsize * 2; int i, n = pgcnt / 2, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < n; i++) { @@ -124,7 +124,7 @@ static int write_eraseblock_by_2pages(int ebnum) static int read_eraseblock(int ebnum) { - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; return mtdtest_read(mtd, addr, mtd->erasesize, iobuf); } @@ -132,7 +132,7 @@ static int read_eraseblock(int ebnum) static int read_eraseblock_by_page(int ebnum) { int i, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < pgcnt; i++) { @@ -150,7 +150,7 @@ static int read_eraseblock_by_2pages(int ebnum) { size_t sz = pgsize * 2; int i, n = pgcnt / 2, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < n; i++) { diff --git a/drivers/mtd/tests/subpagetest.c b/drivers/mtd/tests/subpagetest.c index a876371ad41..7b59ef522d5 100644 --- a/drivers/mtd/tests/subpagetest.c +++ b/drivers/mtd/tests/subpagetest.c @@ -57,7 +57,7 @@ static int write_eraseblock(int ebnum) { size_t written; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, subpgsize); err = mtd_write(mtd, addr, subpgsize, &written, writebuf); @@ -92,7 +92,7 @@ static int write_eraseblock2(int ebnum) { size_t written; int err = 0, k; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) @@ -131,7 +131,7 @@ static int verify_eraseblock(int ebnum) { size_t read; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, subpgsize); clear_data(readbuf, subpgsize); @@ -192,7 +192,7 @@ static int verify_eraseblock2(int ebnum) { size_t read; int err = 0, k; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) @@ -227,7 +227,7 @@ static int verify_eraseblock_ff(int ebnum) uint32_t j; size_t read; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; memset(writebuf, 0xff, subpgsize); for (j = 0; j < mtd->erasesize / subpgsize; ++j) { diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 776e965dc9f..05b0ca3bf71 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -21,8 +21,12 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - return mdiobus_read(to_mii_bus(ds->master_dev), - ds->pd->sw_addr + addr, reg); + struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); + + if (bus == NULL) + return -EINVAL; + + return mdiobus_read(bus, ds->pd->sw_addr + addr, reg); } #define REG_READ(addr, reg) \ @@ -38,8 +42,12 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - return mdiobus_write(to_mii_bus(ds->master_dev), - ds->pd->sw_addr + addr, reg, val); + struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); + + if (bus == NULL) + return -EINVAL; + + return mdiobus_write(bus, ds->pd->sw_addr + addr, reg, val); } #define REG_WRITE(addr, reg, val) \ diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 6365e30138a..1020a7af67c 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -206,7 +206,7 @@ static int mv88e6171_setup_port(struct dsa_switch *ds, int p) */ val = 0x0433; if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == htons(ETH_P_EDSA)) + if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) val |= 0x3300; else val |= 0x0100; diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index d6f6428b27d..a6c90cf5634 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -75,11 +75,14 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); int ret; + if (bus == NULL) + return -EINVAL; + mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_read(to_mii_bus(ds->master_dev), - ds->pd->sw_addr, addr, reg); + ret = __mv88e6xxx_reg_read(bus, ds->pd->sw_addr, addr, reg); mutex_unlock(&ps->smi_mutex); return ret; @@ -119,11 +122,14 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); int ret; + if (bus == NULL) + return -EINVAL; + mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_write(to_mii_bus(ds->master_dev), - ds->pd->sw_addr, addr, reg, val); + ret = __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val); mutex_unlock(&ps->smi_mutex); return ret; diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile index 589b3524771..68be565548c 100644 --- a/drivers/net/ethernet/apm/xgene/Makefile +++ b/drivers/net/ethernet/apm/xgene/Makefile @@ -2,6 +2,6 @@ # Makefile for APM X-Gene Ethernet Driver. # -xgene-enet-objs := xgene_enet_hw.o xgene_enet_xgmac.o \ +xgene-enet-objs := xgene_enet_hw.o xgene_enet_sgmac.o xgene_enet_xgmac.o \ xgene_enet_main.o xgene_enet_ethtool.o obj-$(CONFIG_NET_XGENE) += xgene-enet.o diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c index c1c997b9234..416d6ebfc2c 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -64,16 +64,25 @@ static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) return -ENODEV; return phy_ethtool_gset(phydev, cmd); + } else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { + cmd->supported = SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_MII; + cmd->advertising = cmd->supported; + ethtool_cmd_speed_set(cmd, SPEED_1000); + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_MII; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_ENABLE; + } else { + cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; + cmd->advertising = cmd->supported; + ethtool_cmd_speed_set(cmd, SPEED_10000); + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_FIBRE; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; } - cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_10000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_FIBRE; - cmd->transceiver = XCVR_EXTERNAL; - cmd->autoneg = AUTONEG_DISABLE; - return 0; } diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index c8f3824f760..63ea1941e97 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -410,7 +410,6 @@ static void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata) addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | (dev_addr[1] << 8) | dev_addr[0]; addr1 = (dev_addr[5] << 24) | (dev_addr[4] << 16); - addr1 |= pdata->phy_addr & 0xFFFF; xgene_enet_wr_mcx_mac(pdata, STATION_ADDR0_ADDR, addr0); xgene_enet_wr_mcx_mac(pdata, STATION_ADDR1_ADDR, addr1); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 15ec4267779..38558584080 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -44,6 +44,7 @@ static inline u32 xgene_get_bits(u32 val, u32 start, u32 end) enum xgene_enet_rm { RM0, + RM1, RM3 = 3 }; @@ -143,6 +144,8 @@ enum xgene_enet_rm { #define CFG_CLE_FPSEL0_SET(dst, val) xgene_set_bits(dst, val, 16, 4) #define CFG_MACMODE_SET(dst, val) xgene_set_bits(dst, val, 18, 2) #define CFG_WAITASYNCRD_SET(dst, val) xgene_set_bits(dst, val, 0, 16) +#define CFG_CLE_DSTQID0(val) (val & GENMASK(11, 0)) +#define CFG_CLE_FPSEL0(val) ((val << 16) & GENMASK(19, 16)) #define ICM_CONFIG0_REG_0_ADDR 0x0400 #define ICM_CONFIG2_REG_0_ADDR 0x0410 #define RX_DV_GATE_REG_0_ADDR 0x05fc @@ -179,7 +182,6 @@ enum xgene_enet_rm { #define TUND_ADDR 0x4a #define TSO_IPPROTO_TCP 1 -#define FULL_DUPLEX 2 #define USERINFO_POS 0 #define USERINFO_LEN 32 diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 9b85239ceed..3c208cc6f6b 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -21,6 +21,7 @@ #include "xgene_enet_main.h" #include "xgene_enet_hw.h" +#include "xgene_enet_sgmac.h" #include "xgene_enet_xgmac.h" static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool) @@ -813,6 +814,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) return pdata->phy_mode; } if (pdata->phy_mode != PHY_INTERFACE_MODE_RGMII && + pdata->phy_mode != PHY_INTERFACE_MODE_SGMII && pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) { dev_err(dev, "Incorrect phy-connection-type specified\n"); return -ENODEV; @@ -830,14 +832,13 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET; pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET; pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET; - if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { + if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII || + pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { pdata->mcx_mac_addr = base_addr + BLOCK_ETH_MAC_OFFSET; pdata->mcx_mac_csr_addr = base_addr + BLOCK_ETH_MAC_CSR_OFFSET; - pdata->rm = RM3; } else { pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET; pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET; - pdata->rm = RM0; } pdata->rx_buff_cnt = NUM_PKT_BUF; @@ -881,10 +882,17 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) case PHY_INTERFACE_MODE_RGMII: pdata->mac_ops = &xgene_gmac_ops; pdata->port_ops = &xgene_gport_ops; + pdata->rm = RM3; + break; + case PHY_INTERFACE_MODE_SGMII: + pdata->mac_ops = &xgene_sgmac_ops; + pdata->port_ops = &xgene_sgport_ops; + pdata->rm = RM1; break; default: pdata->mac_ops = &xgene_xgmac_ops; pdata->port_ops = &xgene_xgport_ops; + pdata->rm = RM0; break; } } @@ -895,6 +903,7 @@ static int xgene_enet_probe(struct platform_device *pdev) struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; struct napi_struct *napi; + struct xgene_mac_ops *mac_ops; int ret; ndev = alloc_etherdev(sizeof(struct xgene_enet_pdata)); @@ -937,10 +946,11 @@ static int xgene_enet_probe(struct platform_device *pdev) napi = &pdata->rx_ring->napi; netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + mac_ops = pdata->mac_ops; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) ret = xgene_enet_mdio_config(pdata); else - INIT_DELAYED_WORK(&pdata->link_work, xgene_enet_link_state); + INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state); return ret; err: diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 86cf68b6558..874e5a01161 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -39,6 +39,9 @@ #define NUM_PKT_BUF 64 #define NUM_BUFPOOL 32 +#define PHY_POLL_LINK_ON (10 * HZ) +#define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) + /* software context of a descriptor ring */ struct xgene_enet_desc_ring { struct net_device *ndev; @@ -76,6 +79,7 @@ struct xgene_mac_ops { void (*tx_disable)(struct xgene_enet_pdata *pdata); void (*rx_disable)(struct xgene_enet_pdata *pdata); void (*set_mac_addr)(struct xgene_enet_pdata *pdata); + void (*link_state)(struct work_struct *work); }; struct xgene_port_ops { @@ -109,7 +113,6 @@ struct xgene_enet_pdata { void __iomem *base_addr; void __iomem *ring_csr_addr; void __iomem *ring_cmd_addr; - u32 phy_addr; int phy_mode; enum xgene_enet_rm rm; struct rtnl_link_stats64 stats; @@ -118,6 +121,13 @@ struct xgene_enet_pdata { struct delayed_work link_work; }; +struct xgene_indirect_ctl { + void __iomem *addr; + void __iomem *ctl; + void __iomem *cmd; + void __iomem *cmd_done; +}; + /* Set the specified value into a bit-field defined by its starting position * and length within a single u64. */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c new file mode 100644 index 00000000000..e6d24c21019 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -0,0 +1,389 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * Keyur Chudgar <kchudgar@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "xgene_enet_main.h" +#include "xgene_enet_hw.h" +#include "xgene_enet_sgmac.h" + +static void xgene_enet_wr_csr(struct xgene_enet_pdata *p, u32 offset, u32 val) +{ + iowrite32(val, p->eth_csr_addr + offset); +} + +static void xgene_enet_wr_ring_if(struct xgene_enet_pdata *p, + u32 offset, u32 val) +{ + iowrite32(val, p->eth_ring_if_addr + offset); +} + +static void xgene_enet_wr_diag_csr(struct xgene_enet_pdata *p, + u32 offset, u32 val) +{ + iowrite32(val, p->eth_diag_csr_addr + offset); +} + +static bool xgene_enet_wr_indirect(struct xgene_indirect_ctl *ctl, + u32 wr_addr, u32 wr_data) +{ + int i; + + iowrite32(wr_addr, ctl->addr); + iowrite32(wr_data, ctl->ctl); + iowrite32(XGENE_ENET_WR_CMD, ctl->cmd); + + /* wait for write command to complete */ + for (i = 0; i < 10; i++) { + if (ioread32(ctl->cmd_done)) { + iowrite32(0, ctl->cmd); + return true; + } + udelay(1); + } + + return false; +} + +static void xgene_enet_wr_mac(struct xgene_enet_pdata *p, + u32 wr_addr, u32 wr_data) +{ + struct xgene_indirect_ctl ctl = { + .addr = p->mcx_mac_addr + MAC_ADDR_REG_OFFSET, + .ctl = p->mcx_mac_addr + MAC_WRITE_REG_OFFSET, + .cmd = p->mcx_mac_addr + MAC_COMMAND_REG_OFFSET, + .cmd_done = p->mcx_mac_addr + MAC_COMMAND_DONE_REG_OFFSET + }; + + if (!xgene_enet_wr_indirect(&ctl, wr_addr, wr_data)) + netdev_err(p->ndev, "mac write failed, addr: %04x\n", wr_addr); +} + +static u32 xgene_enet_rd_csr(struct xgene_enet_pdata *p, u32 offset) +{ + return ioread32(p->eth_csr_addr + offset); +} + +static u32 xgene_enet_rd_diag_csr(struct xgene_enet_pdata *p, u32 offset) +{ + return ioread32(p->eth_diag_csr_addr + offset); +} + +static u32 xgene_enet_rd_indirect(struct xgene_indirect_ctl *ctl, u32 rd_addr) +{ + u32 rd_data; + int i; + + iowrite32(rd_addr, ctl->addr); + iowrite32(XGENE_ENET_RD_CMD, ctl->cmd); + + /* wait for read command to complete */ + for (i = 0; i < 10; i++) { + if (ioread32(ctl->cmd_done)) { + rd_data = ioread32(ctl->ctl); + iowrite32(0, ctl->cmd); + + return rd_data; + } + udelay(1); + } + + pr_err("%s: mac read failed, addr: %04x\n", __func__, rd_addr); + + return 0; +} + +static u32 xgene_enet_rd_mac(struct xgene_enet_pdata *p, u32 rd_addr) +{ + struct xgene_indirect_ctl ctl = { + .addr = p->mcx_mac_addr + MAC_ADDR_REG_OFFSET, + .ctl = p->mcx_mac_addr + MAC_READ_REG_OFFSET, + .cmd = p->mcx_mac_addr + MAC_COMMAND_REG_OFFSET, + .cmd_done = p->mcx_mac_addr + MAC_COMMAND_DONE_REG_OFFSET + }; + + return xgene_enet_rd_indirect(&ctl, rd_addr); +} + +static int xgene_enet_ecc_init(struct xgene_enet_pdata *p) +{ + struct net_device *ndev = p->ndev; + u32 data; + int i; + + xgene_enet_wr_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0); + for (i = 0; i < 10 && data != ~0U ; i++) { + usleep_range(100, 110); + data = xgene_enet_rd_diag_csr(p, ENET_BLOCK_MEM_RDY_ADDR); + } + + if (data != ~0U) { + netdev_err(ndev, "Failed to release memory from shutdown\n"); + return -ENODEV; + } + + return 0; +} + +static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *p) +{ + u32 val = 0xffffffff; + + xgene_enet_wr_ring_if(p, ENET_CFGSSQMIWQASSOC_ADDR, val); + xgene_enet_wr_ring_if(p, ENET_CFGSSQMIFPQASSOC_ADDR, val); +} + +static void xgene_mii_phy_write(struct xgene_enet_pdata *p, u8 phy_id, + u32 reg, u16 data) +{ + u32 addr, wr_data, done; + int i; + + addr = PHY_ADDR(phy_id) | REG_ADDR(reg); + xgene_enet_wr_mac(p, MII_MGMT_ADDRESS_ADDR, addr); + + wr_data = PHY_CONTROL(data); + xgene_enet_wr_mac(p, MII_MGMT_CONTROL_ADDR, wr_data); + + for (i = 0; i < 10; i++) { + done = xgene_enet_rd_mac(p, MII_MGMT_INDICATORS_ADDR); + if (!(done & BUSY_MASK)) + return; + usleep_range(10, 20); + } + + netdev_err(p->ndev, "MII_MGMT write failed\n"); +} + +static u32 xgene_mii_phy_read(struct xgene_enet_pdata *p, u8 phy_id, u32 reg) +{ + u32 addr, data, done; + int i; + + addr = PHY_ADDR(phy_id) | REG_ADDR(reg); + xgene_enet_wr_mac(p, MII_MGMT_ADDRESS_ADDR, addr); + xgene_enet_wr_mac(p, MII_MGMT_COMMAND_ADDR, READ_CYCLE_MASK); + + for (i = 0; i < 10; i++) { + done = xgene_enet_rd_mac(p, MII_MGMT_INDICATORS_ADDR); + if (!(done & BUSY_MASK)) { + data = xgene_enet_rd_mac(p, MII_MGMT_STATUS_ADDR); + xgene_enet_wr_mac(p, MII_MGMT_COMMAND_ADDR, 0); + + return data; + } + usleep_range(10, 20); + } + + netdev_err(p->ndev, "MII_MGMT read failed\n"); + + return 0; +} + +static void xgene_sgmac_reset(struct xgene_enet_pdata *p) +{ + xgene_enet_wr_mac(p, MAC_CONFIG_1_ADDR, SOFT_RESET1); + xgene_enet_wr_mac(p, MAC_CONFIG_1_ADDR, 0); +} + +static void xgene_sgmac_set_mac_addr(struct xgene_enet_pdata *p) +{ + u32 addr0, addr1; + u8 *dev_addr = p->ndev->dev_addr; + + addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | + (dev_addr[1] << 8) | dev_addr[0]; + xgene_enet_wr_mac(p, STATION_ADDR0_ADDR, addr0); + + addr1 = xgene_enet_rd_mac(p, STATION_ADDR1_ADDR); + addr1 |= (dev_addr[5] << 24) | (dev_addr[4] << 16); + xgene_enet_wr_mac(p, STATION_ADDR1_ADDR, addr1); +} + +static u32 xgene_enet_link_status(struct xgene_enet_pdata *p) +{ + u32 data; + + data = xgene_mii_phy_read(p, INT_PHY_ADDR, + SGMII_BASE_PAGE_ABILITY_ADDR >> 2); + + return data & LINK_UP; +} + +static void xgene_sgmac_init(struct xgene_enet_pdata *p) +{ + u32 data, loop = 10; + + xgene_sgmac_reset(p); + + /* Enable auto-negotiation */ + xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_CONTROL_ADDR >> 2, 0x1000); + xgene_mii_phy_write(p, INT_PHY_ADDR, SGMII_TBI_CONTROL_ADDR >> 2, 0); + + while (loop--) { + data = xgene_mii_phy_read(p, INT_PHY_ADDR, + SGMII_STATUS_ADDR >> 2); + if ((data & AUTO_NEG_COMPLETE) && (data & LINK_STATUS)) + break; + usleep_range(10, 20); + } + if (!(data & AUTO_NEG_COMPLETE) || !(data & LINK_STATUS)) + netdev_err(p->ndev, "Auto-negotiation failed\n"); + + data = xgene_enet_rd_mac(p, MAC_CONFIG_2_ADDR); + ENET_INTERFACE_MODE2_SET(&data, 2); + xgene_enet_wr_mac(p, MAC_CONFIG_2_ADDR, data | FULL_DUPLEX2); + xgene_enet_wr_mac(p, INTERFACE_CONTROL_ADDR, ENET_GHD_MODE); + + data = xgene_enet_rd_csr(p, ENET_SPARE_CFG_REG_ADDR); + data |= MPA_IDLE_WITH_QMI_EMPTY; + xgene_enet_wr_csr(p, ENET_SPARE_CFG_REG_ADDR, data); + + xgene_sgmac_set_mac_addr(p); + + data = xgene_enet_rd_csr(p, DEBUG_REG_ADDR); + data |= CFG_BYPASS_UNISEC_TX | CFG_BYPASS_UNISEC_RX; + xgene_enet_wr_csr(p, DEBUG_REG_ADDR, data); + + /* Adjust MDC clock frequency */ + data = xgene_enet_rd_mac(p, MII_MGMT_CONFIG_ADDR); + MGMT_CLOCK_SEL_SET(&data, 7); + xgene_enet_wr_mac(p, MII_MGMT_CONFIG_ADDR, data); + + /* Enable drop if bufpool not available */ + data = xgene_enet_rd_csr(p, RSIF_CONFIG_REG_ADDR); + data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; + xgene_enet_wr_csr(p, RSIF_CONFIG_REG_ADDR, data); + + /* Rtype should be copied from FP */ + xgene_enet_wr_csr(p, RSIF_RAM_DBG_REG0_ADDR, 0); + + /* Bypass traffic gating */ + xgene_enet_wr_csr(p, CFG_LINK_AGGR_RESUME_0_ADDR, TX_PORT0); + xgene_enet_wr_csr(p, CFG_BYPASS_ADDR, RESUME_TX); + xgene_enet_wr_csr(p, SG_RX_DV_GATE_REG_0_ADDR, RESUME_RX0); +} + +static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set) +{ + u32 data; + + data = xgene_enet_rd_mac(p, MAC_CONFIG_1_ADDR); + + if (set) + data |= bits; + else + data &= ~bits; + + xgene_enet_wr_mac(p, MAC_CONFIG_1_ADDR, data); +} + +static void xgene_sgmac_rx_enable(struct xgene_enet_pdata *p) +{ + xgene_sgmac_rxtx(p, RX_EN, true); +} + +static void xgene_sgmac_tx_enable(struct xgene_enet_pdata *p) +{ + xgene_sgmac_rxtx(p, TX_EN, true); +} + +static void xgene_sgmac_rx_disable(struct xgene_enet_pdata *p) +{ + xgene_sgmac_rxtx(p, RX_EN, false); +} + +static void xgene_sgmac_tx_disable(struct xgene_enet_pdata *p) +{ + xgene_sgmac_rxtx(p, TX_EN, false); +} + +static void xgene_enet_reset(struct xgene_enet_pdata *p) +{ + clk_prepare_enable(p->clk); + clk_disable_unprepare(p->clk); + clk_prepare_enable(p->clk); + + xgene_enet_ecc_init(p); + xgene_enet_config_ring_if_assoc(p); +} + +static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p, + u32 dst_ring_num, u16 bufpool_id) +{ + u32 data, fpsel; + + data = CFG_CLE_BYPASS_EN0; + xgene_enet_wr_csr(p, CLE_BYPASS_REG0_0_ADDR, data); + + fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20; + data = CFG_CLE_DSTQID0(dst_ring_num) | CFG_CLE_FPSEL0(fpsel); + xgene_enet_wr_csr(p, CLE_BYPASS_REG1_0_ADDR, data); +} + +static void xgene_enet_shutdown(struct xgene_enet_pdata *p) +{ + clk_disable_unprepare(p->clk); +} + +static void xgene_enet_link_state(struct work_struct *work) +{ + struct xgene_enet_pdata *p = container_of(to_delayed_work(work), + struct xgene_enet_pdata, link_work); + struct net_device *ndev = p->ndev; + u32 link, poll_interval; + + link = xgene_enet_link_status(p); + if (link) { + if (!netif_carrier_ok(ndev)) { + netif_carrier_on(ndev); + xgene_sgmac_init(p); + xgene_sgmac_rx_enable(p); + xgene_sgmac_tx_enable(p); + netdev_info(ndev, "Link is Up - 1Gbps\n"); + } + poll_interval = PHY_POLL_LINK_ON; + } else { + if (netif_carrier_ok(ndev)) { + xgene_sgmac_rx_disable(p); + xgene_sgmac_tx_disable(p); + netif_carrier_off(ndev); + netdev_info(ndev, "Link is Down\n"); + } + poll_interval = PHY_POLL_LINK_OFF; + } + + schedule_delayed_work(&p->link_work, poll_interval); +} + +struct xgene_mac_ops xgene_sgmac_ops = { + .init = xgene_sgmac_init, + .reset = xgene_sgmac_reset, + .rx_enable = xgene_sgmac_rx_enable, + .tx_enable = xgene_sgmac_tx_enable, + .rx_disable = xgene_sgmac_rx_disable, + .tx_disable = xgene_sgmac_tx_disable, + .set_mac_addr = xgene_sgmac_set_mac_addr, + .link_state = xgene_enet_link_state +}; + +struct xgene_port_ops xgene_sgport_ops = { + .reset = xgene_enet_reset, + .cle_bypass = xgene_enet_cle_bypass, + .shutdown = xgene_enet_shutdown +}; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h new file mode 100644 index 00000000000..de432465009 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h @@ -0,0 +1,41 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * Keyur Chudgar <kchudgar@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __XGENE_ENET_SGMAC_H__ +#define __XGENE_ENET_SGMAC_H__ + +#define PHY_ADDR(src) (((src)<<8) & GENMASK(12, 8)) +#define REG_ADDR(src) ((src) & GENMASK(4, 0)) +#define PHY_CONTROL(src) ((src) & GENMASK(15, 0)) +#define INT_PHY_ADDR 0x1e +#define SGMII_TBI_CONTROL_ADDR 0x44 +#define SGMII_CONTROL_ADDR 0x00 +#define SGMII_STATUS_ADDR 0x04 +#define SGMII_BASE_PAGE_ABILITY_ADDR 0x14 +#define AUTO_NEG_COMPLETE BIT(5) +#define LINK_STATUS BIT(2) +#define LINK_UP BIT(15) +#define MPA_IDLE_WITH_QMI_EMPTY BIT(12) +#define SG_RX_DV_GATE_REG_0_ADDR 0x0dfc + +extern struct xgene_mac_ops xgene_sgmac_ops; +extern struct xgene_port_ops xgene_sgport_ops; + +#endif /* __XGENE_ENET_SGMAC_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index cd64b9f18b5..67d07206b3c 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -284,7 +284,7 @@ static void xgene_enet_shutdown(struct xgene_enet_pdata *pdata) clk_disable_unprepare(pdata->clk); } -void xgene_enet_link_state(struct work_struct *work) +static void xgene_enet_link_state(struct work_struct *work) { struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work), struct xgene_enet_pdata, link_work); @@ -322,6 +322,7 @@ struct xgene_mac_ops xgene_xgmac_ops = { .rx_disable = xgene_xgmac_rx_disable, .tx_disable = xgene_xgmac_tx_disable, .set_mac_addr = xgene_xgmac_set_mac_addr, + .link_state = xgene_enet_link_state }; struct xgene_port_ops xgene_xgport_ops = { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index d2d59e7ed9a..5a5296a6d1d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -47,10 +47,6 @@ #define XG_ENET_SPARE_CFG_REG_1_ADDR 0x0410 #define XGENET_RX_DV_GATE_REG_0_ADDR 0x0804 -#define PHY_POLL_LINK_ON (10 * HZ) -#define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) - -void xgene_enet_link_state(struct work_struct *work); extern struct xgene_mac_ops xgene_xgmac_ops; extern struct xgene_port_ops xgene_xgport_ops; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ba499489969..dbb41c1923e 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8099,9 +8099,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Sync BD data before updating mailbox */ wmb(); - /* Packets are ready, update Tx producer idx local and on card. */ - tw32_tx_mbox(tnapi->prodmbox, entry); - tnapi->tx_prod = entry; if (unlikely(tg3_tx_avail(tnapi) <= (MAX_SKB_FRAGS + 1))) { netif_tx_stop_queue(txq); @@ -8116,7 +8113,12 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_wake_queue(txq); } - mmiowb(); + if (!skb->xmit_more || netif_xmit_stopped(txq)) { + /* Packets are ready, update Tx producer idx on card. */ + tw32_tx_mbox(tnapi->prodmbox, entry); + mmiowb(); + } + return NETDEV_TX_OK; dma_error: diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 153cafac323..c3861de9dc8 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -552,6 +552,7 @@ bnad_cq_setup_skb_frags(struct bna_rcb *rcb, struct sk_buff *skb, len = (vec == nvecs) ? last_fraglen : unmap->vector.len; + skb->truesize += unmap->vector.len; totlen += len; skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, @@ -563,7 +564,6 @@ bnad_cq_setup_skb_frags(struct bna_rcb *rcb, struct sk_buff *skb, skb->len += totlen; skb->data_len += totlen; - skb->truesize += totlen; } static inline void diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index c3ce9df0041..ac6473f75eb 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -68,7 +68,7 @@ config CHELSIO_T3 config CHELSIO_T4 tristate "Chelsio Communications T4/T5 Ethernet support" - depends on PCI + depends on PCI && (IPV6 || IPV6=n) select FW_LOADER select MDIO ---help--- diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 410ed5805a9..3c481b26074 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -986,6 +986,8 @@ static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, int t4_seeprom_wp(struct adapter *adapter, bool enable); int get_vpd_params(struct adapter *adapter, struct vpd_params *p); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); +int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, + const u8 *fw_data, unsigned int size, int force); unsigned int t4_flash_cfg_addr(struct adapter *adapter); int t4_get_fw_version(struct adapter *adapter, u32 *vers); int t4_get_tp_version(struct adapter *adapter, u32 *vers); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5b38e955af6..3f60070f251 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2929,16 +2929,26 @@ static int set_flash(struct net_device *netdev, struct ethtool_flash *ef) int ret; const struct firmware *fw; struct adapter *adap = netdev2adap(netdev); + unsigned int mbox = FW_PCIE_FW_MASTER_MASK + 1; ef->data[sizeof(ef->data) - 1] = '\0'; ret = request_firmware(&fw, ef->data, adap->pdev_dev); if (ret < 0) return ret; - ret = t4_load_fw(adap, fw->data, fw->size); + /* If the adapter has been fully initialized then we'll go ahead and + * try to get the firmware's cooperation in upgrading to the new + * firmware image otherwise we'll try to do the entire job from the + * host ... and we always "force" the operation in this path. + */ + if (adap->flags & FULL_INIT_DONE) + mbox = adap->mbox; + + ret = t4_fw_upgrade(adap, mbox, fw->data, fw->size, 1); release_firmware(fw); if (!ret) - dev_info(adap->pdev_dev, "loaded firmware %s\n", ef->data); + dev_info(adap->pdev_dev, "loaded firmware %s," + " reload cxgb4 driver\n", ef->data); return ret; } @@ -4359,6 +4369,7 @@ EXPORT_SYMBOL(cxgb4_unregister_uld); * success (true) if it belongs otherwise failure (false). * Called with rcu_read_lock() held. */ +#if IS_ENABLED(CONFIG_IPV6) static bool cxgb4_netdev(const struct net_device *netdev) { struct adapter *adap; @@ -4480,6 +4491,13 @@ static int update_root_dev_clip(struct net_device *dev) return ret; /* Parse all bond and vlan devices layered on top of the physical dev */ + root_dev = netdev_master_upper_dev_get_rcu(dev); + if (root_dev) { + ret = update_dev_clip(root_dev, dev); + if (ret) + return ret; + } + for (i = 0; i < VLAN_N_VID; i++) { root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i); if (!root_dev) @@ -4512,6 +4530,7 @@ static void update_clip(const struct adapter *adap) } rcu_read_unlock(); } +#endif /* IS_ENABLED(CONFIG_IPV6) */ /** * cxgb_up - enable the adapter @@ -4558,7 +4577,9 @@ static int cxgb_up(struct adapter *adap) t4_intr_enable(adap); adap->flags |= FULL_INIT_DONE; notify_ulds(adap, CXGB4_STATE_UP); +#if IS_ENABLED(CONFIG_IPV6) update_clip(adap); +#endif out: return err; irq_err: @@ -6852,14 +6873,18 @@ static int __init cxgb4_init_module(void) if (ret < 0) debugfs_remove(cxgb4_debugfs_root); +#if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&cxgb4_inet6addr_notifier); +#endif return ret; } static void __exit cxgb4_cleanup_module(void) { +#if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier); +#endif pci_unregister_driver(&cxgb4_driver); debugfs_remove(cxgb4_debugfs_root); /* NULL ok */ } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 1fff1495fe3..a9d9d74e4f0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -37,8 +37,6 @@ #include "t4_regs.h" #include "t4fw_api.h" -static int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, - const u8 *fw_data, unsigned int size, int force); /** * t4_wait_op_done_val - wait until an operation is completed * @adapter: the adapter performing the operation @@ -3076,8 +3074,8 @@ static int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset) * positive errno indicates that the adapter is ~probably~ intact, a * negative errno indicates that things are looking bad ... */ -static int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, - const u8 *fw_data, unsigned int size, int force) +int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, + const u8 *fw_data, unsigned int size, int force) { const struct fw_hdr *fw_hdr = (const struct fw_hdr *)fw_data; int reset, ret; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 1d5e1822bb2..9af296a1ca9 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -367,6 +367,56 @@ struct bufdesc_ex { #define FEC_VLAN_TAG_LEN 0x04 #define FEC_ETHTYPE_LEN 0x02 +/* Controller is ENET-MAC */ +#define FEC_QUIRK_ENET_MAC (1 << 0) +/* Controller needs driver to swap frame */ +#define FEC_QUIRK_SWAP_FRAME (1 << 1) +/* Controller uses gasket */ +#define FEC_QUIRK_USE_GASKET (1 << 2) +/* Controller has GBIT support */ +#define FEC_QUIRK_HAS_GBIT (1 << 3) +/* Controller has extend desc buffer */ +#define FEC_QUIRK_HAS_BUFDESC_EX (1 << 4) +/* Controller has hardware checksum support */ +#define FEC_QUIRK_HAS_CSUM (1 << 5) +/* Controller has hardware vlan support */ +#define FEC_QUIRK_HAS_VLAN (1 << 6) +/* ENET IP errata ERR006358 + * + * If the ready bit in the transmit buffer descriptor (TxBD[R]) is previously + * detected as not set during a prior frame transmission, then the + * ENET_TDAR[TDAR] bit is cleared at a later time, even if additional TxBDs + * were added to the ring and the ENET_TDAR[TDAR] bit is set. This results in + * frames not being transmitted until there is a 0-to-1 transition on + * ENET_TDAR[TDAR]. + */ +#define FEC_QUIRK_ERR006358 (1 << 7) +/* ENET IP hw AVB + * + * i.MX6SX ENET IP add Audio Video Bridging (AVB) feature support. + * - Two class indicators on receive with configurable priority + * - Two class indicators and line speed timer on transmit allowing + * implementation class credit based shapers externally + * - Additional DMA registers provisioned to allow managing up to 3 + * independent rings + */ +#define FEC_QUIRK_HAS_AVB (1 << 8) +/* There is a TDAR race condition for mutliQ when the software sets TDAR + * and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles). + * This will cause the udma_tx and udma_tx_arbiter state machines to hang. + * The issue exist at i.MX6SX enet IP. + */ +#define FEC_QUIRK_ERR007885 (1 << 9) +/* ENET Block Guide/ Chapter for the iMX6SX (PELE) address one issue: + * After set ENET_ATCR[Capture], there need some time cycles before the counter + * value is capture in the register clock domain. + * The wait-time-cycles is at least 6 clock cycles of the slower clock between + * the register clock and the 1588 clock. The 1588 ts_clk is fixed to 25Mhz, + * register clock is 66Mhz, so the wait-time-cycles must be greater than 240ns + * (40ns * 6). + */ +#define FEC_QUIRK_BUG_CAPTURE (1 << 10) + struct fec_enet_priv_tx_q { int index; unsigned char *tx_bounce[TX_RING_SIZE]; @@ -484,12 +534,22 @@ struct fec_enet_private { unsigned int itr_clk_rate; u32 rx_copybreak; + + /* ptp clock period in ns*/ + unsigned int ptp_inc; + + /* pps */ + int pps_channel; + unsigned int reload_period; + int pps_enable; + unsigned int next_counter; }; void fec_ptp_init(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); +uint fec_ptp_check_pps_event(struct fec_enet_private *fep); /****************************************************************************/ #endif /* FEC_H */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 87975b5dda9..81b96cf8757 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -78,47 +78,6 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 -/* Controller is ENET-MAC */ -#define FEC_QUIRK_ENET_MAC (1 << 0) -/* Controller needs driver to swap frame */ -#define FEC_QUIRK_SWAP_FRAME (1 << 1) -/* Controller uses gasket */ -#define FEC_QUIRK_USE_GASKET (1 << 2) -/* Controller has GBIT support */ -#define FEC_QUIRK_HAS_GBIT (1 << 3) -/* Controller has extend desc buffer */ -#define FEC_QUIRK_HAS_BUFDESC_EX (1 << 4) -/* Controller has hardware checksum support */ -#define FEC_QUIRK_HAS_CSUM (1 << 5) -/* Controller has hardware vlan support */ -#define FEC_QUIRK_HAS_VLAN (1 << 6) -/* ENET IP errata ERR006358 - * - * If the ready bit in the transmit buffer descriptor (TxBD[R]) is previously - * detected as not set during a prior frame transmission, then the - * ENET_TDAR[TDAR] bit is cleared at a later time, even if additional TxBDs - * were added to the ring and the ENET_TDAR[TDAR] bit is set. This results in - * frames not being transmitted until there is a 0-to-1 transition on - * ENET_TDAR[TDAR]. - */ -#define FEC_QUIRK_ERR006358 (1 << 7) -/* ENET IP hw AVB - * - * i.MX6SX ENET IP add Audio Video Bridging (AVB) feature support. - * - Two class indicators on receive with configurable priority - * - Two class indicators and line speed timer on transmit allowing - * implementation class credit based shapers externally - * - Additional DMA registers provisioned to allow managing up to 3 - * independent rings - */ -#define FEC_QUIRK_HAS_AVB (1 << 8) -/* There is a TDAR race condition for mutliQ when the software sets TDAR - * and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles). - * This will cause the udma_tx and udma_tx_arbiter state machines to hang. - * The issue exist at i.MX6SX enet IP. - */ -#define FEC_QUIRK_ERR007885 (1 << 9) - static struct platform_device_id fec_devtype[] = { { /* keep it for coldfire */ @@ -146,7 +105,7 @@ static struct platform_device_id fec_devtype[] = { .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | - FEC_QUIRK_ERR007885, + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE, }, { /* sentinel */ } @@ -1622,6 +1581,8 @@ fec_enet_interrupt(int irq, void *dev_id) complete(&fep->mdio_done); } + fec_ptp_check_pps_event(fep); + return ret; } @@ -2912,20 +2873,12 @@ static void fec_poll_controller(struct net_device *dev) #endif #define FEATURES_NEED_QUIESCE NETIF_F_RXCSUM - -static int fec_set_features(struct net_device *netdev, +static inline void fec_enet_set_netdev_features(struct net_device *netdev, netdev_features_t features) { struct fec_enet_private *fep = netdev_priv(netdev); netdev_features_t changed = features ^ netdev->features; - /* Quiesce the device if necessary */ - if (netif_running(netdev) && changed & FEATURES_NEED_QUIESCE) { - napi_disable(&fep->napi); - netif_tx_lock_bh(netdev); - fec_stop(netdev); - } - netdev->features = features; /* Receive checksum has been changed */ @@ -2935,13 +2888,25 @@ static int fec_set_features(struct net_device *netdev, else fep->csum_flags &= ~FLAG_RX_CSUM_ENABLED; } +} + +static int fec_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct fec_enet_private *fep = netdev_priv(netdev); + netdev_features_t changed = features ^ netdev->features; - /* Resume the device after updates */ if (netif_running(netdev) && changed & FEATURES_NEED_QUIESCE) { + napi_disable(&fep->napi); + netif_tx_lock_bh(netdev); + fec_stop(netdev); + fec_enet_set_netdev_features(netdev, features); fec_restart(netdev); netif_tx_wake_all_queues(netdev); netif_tx_unlock_bh(netdev); napi_enable(&fep->napi); + } else { + fec_enet_set_netdev_features(netdev, features); } return 0; diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index cca3617a232..992c8c3db55 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -61,6 +61,24 @@ #define FEC_T_INC_CORR_MASK 0x00007f00 #define FEC_T_INC_CORR_OFFSET 8 +#define FEC_T_CTRL_PINPER 0x00000080 +#define FEC_T_TF0_MASK 0x00000001 +#define FEC_T_TF0_OFFSET 0 +#define FEC_T_TF1_MASK 0x00000002 +#define FEC_T_TF1_OFFSET 1 +#define FEC_T_TF2_MASK 0x00000004 +#define FEC_T_TF2_OFFSET 2 +#define FEC_T_TF3_MASK 0x00000008 +#define FEC_T_TF3_OFFSET 3 +#define FEC_T_TDRE_MASK 0x00000001 +#define FEC_T_TDRE_OFFSET 0 +#define FEC_T_TMODE_MASK 0x0000003C +#define FEC_T_TMODE_OFFSET 2 +#define FEC_T_TIE_MASK 0x00000040 +#define FEC_T_TIE_OFFSET 6 +#define FEC_T_TF_MASK 0x00000080 +#define FEC_T_TF_OFFSET 7 + #define FEC_ATIME_CTRL 0x400 #define FEC_ATIME 0x404 #define FEC_ATIME_EVT_OFFSET 0x408 @@ -69,7 +87,143 @@ #define FEC_ATIME_INC 0x414 #define FEC_TS_TIMESTAMP 0x418 +#define FEC_TGSR 0x604 +#define FEC_TCSR(n) (0x608 + n * 0x08) +#define FEC_TCCR(n) (0x60C + n * 0x08) +#define MAX_TIMER_CHANNEL 3 +#define FEC_TMODE_TOGGLE 0x05 +#define FEC_HIGH_PULSE 0x0F + #define FEC_CC_MULT (1 << 31) +#define FEC_COUNTER_PERIOD (1 << 31) +#define PPS_OUPUT_RELOAD_PERIOD NSEC_PER_SEC +#define FEC_CHANNLE_0 0 +#define DEFAULT_PPS_CHANNEL FEC_CHANNLE_0 + +/** + * fec_ptp_enable_pps + * @fep: the fec_enet_private structure handle + * @enable: enable the channel pps output + * + * This function enble the PPS ouput on the timer channel. + */ +static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable) +{ + unsigned long flags; + u32 val, tempval; + int inc; + struct timespec ts; + u64 ns; + u32 remainder; + val = 0; + + if (!(fep->hwts_tx_en || fep->hwts_rx_en)) { + dev_err(&fep->pdev->dev, "No ptp stack is running\n"); + return -EINVAL; + } + + if (fep->pps_enable == enable) + return 0; + + fep->pps_channel = DEFAULT_PPS_CHANNEL; + fep->reload_period = PPS_OUPUT_RELOAD_PERIOD; + inc = fep->ptp_inc; + + spin_lock_irqsave(&fep->tmreg_lock, flags); + + if (enable) { + /* clear capture or output compare interrupt status if have. + */ + writel(FEC_T_TF_MASK, fep->hwp + FEC_TCSR(fep->pps_channel)); + + /* It is recommended to doulbe check the TMODE field in the + * TCSR register to be cleared before the first compare counter + * is written into TCCR register. Just add a double check. + */ + val = readl(fep->hwp + FEC_TCSR(fep->pps_channel)); + do { + val &= ~(FEC_T_TMODE_MASK); + writel(val, fep->hwp + FEC_TCSR(fep->pps_channel)); + val = readl(fep->hwp + FEC_TCSR(fep->pps_channel)); + } while (val & FEC_T_TMODE_MASK); + + /* Dummy read counter to update the counter */ + timecounter_read(&fep->tc); + /* We want to find the first compare event in the next + * second point. So we need to know what the ptp time + * is now and how many nanoseconds is ahead to get next second. + * The remaining nanosecond ahead before the next second would be + * NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds + * to current timer would be next second. + */ + tempval = readl(fep->hwp + FEC_ATIME_CTRL); + tempval |= FEC_T_CTRL_CAPTURE; + writel(tempval, fep->hwp + FEC_ATIME_CTRL); + + tempval = readl(fep->hwp + FEC_ATIME); + /* Convert the ptp local counter to 1588 timestamp */ + ns = timecounter_cyc2time(&fep->tc, tempval); + ts.tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder); + ts.tv_nsec = remainder; + + /* The tempval is less than 3 seconds, and so val is less than + * 4 seconds. No overflow for 32bit calculation. + */ + val = NSEC_PER_SEC - (u32)ts.tv_nsec + tempval; + + /* Need to consider the situation that the current time is + * very close to the second point, which means NSEC_PER_SEC + * - ts.tv_nsec is close to be zero(For example 20ns); Since the timer + * is still running when we calculate the first compare event, it is + * possible that the remaining nanoseonds run out before the compare + * counter is calculated and written into TCCR register. To avoid + * this possibility, we will set the compare event to be the next + * of next second. The current setting is 31-bit timer and wrap + * around over 2 seconds. So it is okay to set the next of next + * seond for the timer. + */ + val += NSEC_PER_SEC; + + /* We add (2 * NSEC_PER_SEC - (u32)ts.tv_nsec) to current + * ptp counter, which maybe cause 32-bit wrap. Since the + * (NSEC_PER_SEC - (u32)ts.tv_nsec) is less than 2 second. + * We can ensure the wrap will not cause issue. If the offset + * is bigger than fep->cc.mask would be a error. + */ + val &= fep->cc.mask; + writel(val, fep->hwp + FEC_TCCR(fep->pps_channel)); + + /* Calculate the second the compare event timestamp */ + fep->next_counter = (val + fep->reload_period) & fep->cc.mask; + + /* * Enable compare event when overflow */ + val = readl(fep->hwp + FEC_ATIME_CTRL); + val |= FEC_T_CTRL_PINPER; + writel(val, fep->hwp + FEC_ATIME_CTRL); + + /* Compare channel setting. */ + val = readl(fep->hwp + FEC_TCSR(fep->pps_channel)); + val |= (1 << FEC_T_TF_OFFSET | 1 << FEC_T_TIE_OFFSET); + val &= ~(1 << FEC_T_TDRE_OFFSET); + val &= ~(FEC_T_TMODE_MASK); + val |= (FEC_HIGH_PULSE << FEC_T_TMODE_OFFSET); + writel(val, fep->hwp + FEC_TCSR(fep->pps_channel)); + + /* Write the second compare event timestamp and calculate + * the third timestamp. Refer the TCCR register detail in the spec. + */ + writel(fep->next_counter, fep->hwp + FEC_TCCR(fep->pps_channel)); + fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask; + } else { + writel(0, fep->hwp + FEC_TCSR(fep->pps_channel)); + } + + fep->pps_enable = enable; + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + + return 0; +} + /** * fec_ptp_read - read raw cycle counter (to be used by time counter) * @cc: the cyclecounter structure @@ -82,12 +236,17 @@ static cycle_t fec_ptp_read(const struct cyclecounter *cc) { struct fec_enet_private *fep = container_of(cc, struct fec_enet_private, cc); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); u32 tempval; tempval = readl(fep->hwp + FEC_ATIME_CTRL); tempval |= FEC_T_CTRL_CAPTURE; writel(tempval, fep->hwp + FEC_ATIME_CTRL); + if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE) + udelay(1); + return readl(fep->hwp + FEC_ATIME); } @@ -113,14 +272,15 @@ void fec_ptp_start_cyclecounter(struct net_device *ndev) /* 1ns counter */ writel(inc << FEC_T_INC_OFFSET, fep->hwp + FEC_ATIME_INC); - /* use free running count */ - writel(0, fep->hwp + FEC_ATIME_EVT_PERIOD); + /* use 31-bit timer counter */ + writel(FEC_COUNTER_PERIOD, fep->hwp + FEC_ATIME_EVT_PERIOD); - writel(FEC_T_CTRL_ENABLE, fep->hwp + FEC_ATIME_CTRL); + writel(FEC_T_CTRL_ENABLE | FEC_T_CTRL_PERIOD_RST, + fep->hwp + FEC_ATIME_CTRL); memset(&fep->cc, 0, sizeof(fep->cc)); fep->cc.read = fec_ptp_read; - fep->cc.mask = CLOCKSOURCE_MASK(32); + fep->cc.mask = CLOCKSOURCE_MASK(31); fep->cc.shift = 31; fep->cc.mult = FEC_CC_MULT; @@ -143,32 +303,59 @@ void fec_ptp_start_cyclecounter(struct net_device *ndev) */ static int fec_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) { - u64 diff; unsigned long flags; int neg_adj = 0; - u32 mult = FEC_CC_MULT; + u32 i, tmp; + u32 corr_inc, corr_period; + u32 corr_ns; + u64 lhs, rhs; struct fec_enet_private *fep = container_of(ptp, struct fec_enet_private, ptp_caps); + if (ppb == 0) + return 0; + if (ppb < 0) { ppb = -ppb; neg_adj = 1; } - diff = mult; - diff *= ppb; - diff = div_u64(diff, 1000000000ULL); + /* In theory, corr_inc/corr_period = ppb/NSEC_PER_SEC; + * Try to find the corr_inc between 1 to fep->ptp_inc to + * meet adjustment requirement. + */ + lhs = NSEC_PER_SEC; + rhs = (u64)ppb * (u64)fep->ptp_inc; + for (i = 1; i <= fep->ptp_inc; i++) { + if (lhs >= rhs) { + corr_inc = i; + corr_period = div_u64(lhs, rhs); + break; + } + lhs += NSEC_PER_SEC; + } + /* Not found? Set it to high value - double speed + * correct in every clock step. + */ + if (i > fep->ptp_inc) { + corr_inc = fep->ptp_inc; + corr_period = 1; + } + + if (neg_adj) + corr_ns = fep->ptp_inc - corr_inc; + else + corr_ns = fep->ptp_inc + corr_inc; spin_lock_irqsave(&fep->tmreg_lock, flags); - /* - * dummy read to set cycle_last in tc to now. - * So use adjusted mult to calculate when next call - * timercounter_read. - */ - timecounter_read(&fep->tc); - fep->cc.mult = neg_adj ? mult - diff : mult + diff; + tmp = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_MASK; + tmp |= corr_ns << FEC_T_INC_CORR_OFFSET; + writel(tmp, fep->hwp + FEC_ATIME_INC); + writel(corr_period, fep->hwp + FEC_ATIME_CORR); + /* dummy read to update the timer. */ + timecounter_read(&fep->tc); spin_unlock_irqrestore(&fep->tmreg_lock, flags); @@ -188,12 +375,19 @@ static int fec_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) container_of(ptp, struct fec_enet_private, ptp_caps); unsigned long flags; u64 now; + u32 counter; spin_lock_irqsave(&fep->tmreg_lock, flags); now = timecounter_read(&fep->tc); now += delta; + /* Get the timer value based on adjusted timestamp. + * Update the counter with the masked value. + */ + counter = now & fep->cc.mask; + writel(counter, fep->hwp + FEC_ATIME); + /* reset the timecounter */ timecounter_init(&fep->tc, &fep->cc, now); @@ -244,6 +438,7 @@ static int fec_ptp_settime(struct ptp_clock_info *ptp, u64 ns; unsigned long flags; + u32 counter; mutex_lock(&fep->ptp_clk_mutex); /* Check the ptp clock */ @@ -254,8 +449,13 @@ static int fec_ptp_settime(struct ptp_clock_info *ptp, ns = ts->tv_sec * 1000000000ULL; ns += ts->tv_nsec; + /* Get the timer value based on timestamp. + * Update the counter with the masked value. + */ + counter = ns & fep->cc.mask; spin_lock_irqsave(&fep->tmreg_lock, flags); + writel(counter, fep->hwp + FEC_ATIME); timecounter_init(&fep->tc, &fep->cc, ns); spin_unlock_irqrestore(&fep->tmreg_lock, flags); mutex_unlock(&fep->ptp_clk_mutex); @@ -272,6 +472,15 @@ static int fec_ptp_settime(struct ptp_clock_info *ptp, static int fec_ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { + struct fec_enet_private *fep = + container_of(ptp, struct fec_enet_private, ptp_caps); + int ret = 0; + + if (rq->type == PTP_CLK_REQ_PPS) { + ret = fec_ptp_enable_pps(fep, on); + + return ret; + } return -EOPNOTSUPP; } @@ -386,7 +595,7 @@ void fec_ptp_init(struct platform_device *pdev) fep->ptp_caps.n_ext_ts = 0; fep->ptp_caps.n_per_out = 0; fep->ptp_caps.n_pins = 0; - fep->ptp_caps.pps = 0; + fep->ptp_caps.pps = 1; fep->ptp_caps.adjfreq = fec_ptp_adjfreq; fep->ptp_caps.adjtime = fec_ptp_adjtime; fep->ptp_caps.gettime = fec_ptp_gettime; @@ -394,6 +603,7 @@ void fec_ptp_init(struct platform_device *pdev) fep->ptp_caps.enable = fec_ptp_enable; fep->cycle_speed = clk_get_rate(fep->clk_ptp); + fep->ptp_inc = NSEC_PER_SEC / fep->cycle_speed; spin_lock_init(&fep->tmreg_lock); @@ -409,3 +619,36 @@ void fec_ptp_init(struct platform_device *pdev) schedule_delayed_work(&fep->time_keep, HZ); } + +/** + * fec_ptp_check_pps_event + * @fep: the fec_enet_private structure handle + * + * This function check the pps event and reload the timer compare counter. + */ +uint fec_ptp_check_pps_event(struct fec_enet_private *fep) +{ + u32 val; + u8 channel = fep->pps_channel; + struct ptp_clock_event event; + + val = readl(fep->hwp + FEC_TCSR(channel)); + if (val & FEC_T_TF_MASK) { + /* Write the next next compare(not the next according the spec) + * value to the register + */ + writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); + do { + writel(val, fep->hwp + FEC_TCSR(channel)); + } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); + + /* Update the counter; */ + fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask; + + event.type = PTP_CLOCK_PPS; + ptp_clock_event(fep->ptp_clock, &event); + return 1; + } + + return 0; +} diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 379b1a578d3..4fdf0aa1697 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -338,7 +338,7 @@ static void gfar_init_tx_rx_base(struct gfar_private *priv) static void gfar_rx_buff_size_config(struct gfar_private *priv) { - int frame_size = priv->ndev->mtu + ETH_HLEN; + int frame_size = priv->ndev->mtu + ETH_HLEN + ETH_FCS_LEN; /* set this when rx hw offload (TOE) functions are being used */ priv->uses_rxfcb = 0; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 6919adb66f5..5b8300a32bf 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -320,4 +320,15 @@ config FM10K To compile this driver as a module, choose M here. The module will be called fm10k. MSI-X interrupt support is required +config FM10K_VXLAN + bool "Virtual eXtensible Local Area Network Support" + default n + depends on FM10K && VXLAN && !(FM10K=y && VXLAN=m) + ---help--- + This allows one to create VXLAN virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. VXLAN is often used + to tunnel virtual network infrastructure in virtualized environments. + Say Y here if you want to use Virtual eXtensible Local Area Network + (VXLAN) in the driver. + endif # NET_VENDOR_INTEL diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 9d7118a0d67..e645af412e7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -929,6 +929,30 @@ static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring, return i == tx_ring->count; } +static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) +{ + netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. */ + if (likely(fm10k_desc_unused(tx_ring) < size)) + return -EBUSY; + + /* A reprieve! - use start_queue because it doesn't call schedule */ + netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); + ++tx_ring->tx_stats.restart_queue; + return 0; +} + +static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) +{ + if (likely(fm10k_desc_unused(tx_ring) >= size)) + return 0; + return __fm10k_maybe_stop_tx(tx_ring, size); +} + static void fm10k_tx_map(struct fm10k_ring *tx_ring, struct fm10k_tx_buffer *first) { @@ -1022,13 +1046,18 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring, tx_ring->next_to_use = i; + /* Make sure there is space in the ring for the next send. */ + fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED); + /* notify HW of packet */ - writel(i, tx_ring->tail); + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { + writel(i, tx_ring->tail); - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } return; dma_error: @@ -1048,30 +1077,6 @@ dma_error: tx_ring->next_to_use = i; } -static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) -{ - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); - - smp_mb(); - - /* We need to check again in a case another CPU has just - * made room available. */ - if (likely(fm10k_desc_unused(tx_ring) < size)) - return -EBUSY; - - /* A reprieve! - use start_queue because it doesn't call schedule */ - netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; - return 0; -} - -static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) -{ - if (likely(fm10k_desc_unused(tx_ring) >= size)) - return 0; - return __fm10k_maybe_stop_tx(tx_ring, size); -} - netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, struct fm10k_ring *tx_ring) { @@ -1116,8 +1121,6 @@ netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, fm10k_tx_map(tx_ring, first); - fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED); - return NETDEV_TX_OK; out_drop: diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index bf44a8fe711..8811364b91c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -20,9 +20,9 @@ #include "fm10k.h" #include <linux/vmalloc.h> -#if IS_ENABLED(CONFIG_VXLAN) +#if IS_ENABLED(CONFIG_FM10K_VXLAN) #include <net/vxlan.h> -#endif /* CONFIG_VXLAN */ +#endif /* CONFIG_FM10K_VXLAN */ /** * fm10k_setup_tx_resources - allocate Tx resources (Descriptors) @@ -556,7 +556,7 @@ int fm10k_open(struct net_device *netdev) if (err) goto err_set_queues; -#if IS_ENABLED(CONFIG_VXLAN) +#if IS_ENABLED(CONFIG_FM10K_VXLAN) /* update VXLAN port configuration */ vxlan_get_rx_port(netdev); @@ -785,14 +785,14 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) if (!(netdev->flags & IFF_PROMISC)) { err = hw->mac.ops.update_vlan(hw, vid, 0, set); if (err) - return err; + goto err_out; } /* update our base MAC address */ err = hw->mac.ops.update_uc_addr(hw, interface->glort, hw->mac.addr, vid, set, 0); if (err) - return err; + goto err_out; /* set vid prior to syncing/unsyncing the VLAN */ interface->vid = vid + (set ? VLAN_N_VID : 0); @@ -801,9 +801,10 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) __dev_uc_unsync(netdev, fm10k_uc_vlan_unsync); __dev_mc_unsync(netdev, fm10k_mc_vlan_unsync); +err_out: fm10k_mbx_unlock(interface); - return 0; + return err; } static int fm10k_vlan_rx_add_vid(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index e02036c427b..a0cb74ab3dc 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1489,6 +1489,7 @@ void fm10k_up(struct fm10k_intfc *interface) netif_tx_start_all_queues(interface->netdev); /* kick off the service timer */ + hw->mac.get_host_state = 1; mod_timer(&interface->service_timer, jiffies); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 706fc69aa0c..97c85b85953 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1261,6 +1261,9 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) struct ixgbe_hw *hw = &adapter->hw; u32 regval; + if (vf >= adapter->num_vfs) + return -EINVAL; + adapter->vfinfo[vf].spoofchk_enabled = setting; regval = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index ffbae293cef..6e6f18fc5d7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -11,7 +11,7 @@ static const struct qlcnic_mailbox_metadata qlcnic_mbx_tbl[] = { {QLCNIC_CMD_CREATE_RX_CTX, 4, 1}, {QLCNIC_CMD_DESTROY_RX_CTX, 2, 1}, {QLCNIC_CMD_CREATE_TX_CTX, 4, 1}, - {QLCNIC_CMD_DESTROY_TX_CTX, 2, 1}, + {QLCNIC_CMD_DESTROY_TX_CTX, 3, 1}, {QLCNIC_CMD_INTRPT_TEST, 4, 1}, {QLCNIC_CMD_SET_MTU, 4, 1}, {QLCNIC_CMD_READ_PHY, 4, 2}, @@ -32,7 +32,7 @@ static const struct qlcnic_mailbox_metadata qlcnic_mbx_tbl[] = { {QLCNIC_CMD_CONFIGURE_ESWITCH, 4, 1}, {QLCNIC_CMD_GET_MAC_STATS, 4, 1}, {QLCNIC_CMD_GET_ESWITCH_PORT_CONFIG, 4, 3}, - {QLCNIC_CMD_GET_ESWITCH_STATS, 5, 1}, + {QLCNIC_CMD_GET_ESWITCH_STATS, 4, 1}, {QLCNIC_CMD_CONFIG_PORT, 4, 1}, {QLCNIC_CMD_TEMP_SIZE, 4, 4}, {QLCNIC_CMD_GET_TEMP_HDR, 4, 1}, @@ -129,7 +129,7 @@ int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter, } QLCWR32(adapter, QLCNIC_SIGN_CRB_OFFSET, signature); - for (i = 1; i < QLCNIC_CDRP_MAX_ARGS; i++) + for (i = 1; i < cmd->req.num; i++) QLCWR32(adapter, QLCNIC_CDRP_ARG(i), cmd->req.arg[i]); QLCWR32(adapter, QLCNIC_CDRP_CRB_OFFSET, QLCNIC_CDRP_FORM_CMD(cmd->req.arg[0])); diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 60f85149fc4..f77cce034ad 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -71,9 +71,17 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index; } -/* Report whether the NIC considers this TX queue empty, given the - * write_count used for the last doorbell push. May return false - * negative. +/* Get partner of a TX queue, seen as part of the same net core queue */ +static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + return tx_queue - EFX_TXQ_TYPE_OFFLOAD; + else + return tx_queue + EFX_TXQ_TYPE_OFFLOAD; +} + +/* Report whether this TX queue would be empty for the given write_count. + * May return false negative. */ static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count) @@ -86,9 +94,18 @@ static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0; } -static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue) +/* Decide whether we can use TX PIO, ie. write packet data directly into + * a buffer on the device. This can reduce latency at the expense of + * throughput, so we only do this if both hardware and software TX rings + * are empty. This also ensures that only one packet at a time can be + * using the PIO buffer. + */ +static inline bool efx_nic_may_tx_pio(struct efx_tx_queue *tx_queue) { - return __efx_nic_tx_is_empty(tx_queue, tx_queue->write_count); + struct efx_tx_queue *partner = efx_tx_queue_partner(tx_queue); + return tx_queue->piobuf && + __efx_nic_tx_is_empty(tx_queue, tx_queue->insert_count) && + __efx_nic_tx_is_empty(partner, partner->insert_count); } /* Decide whether to push a TX descriptor to the NIC vs merely writing @@ -96,6 +113,8 @@ static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue) * descriptor to an empty queue, but is otherwise pointless. Further, * Falcon and Siena have hardware bugs (SF bug 33851) that may be * triggered if we don't check this. + * We use the write_count used for the last doorbell push, to get the + * NIC's view of the tx queue. */ static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 32060984221..ee84a90e371 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -132,15 +132,6 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) return max_descs; } -/* Get partner of a TX queue, seen as part of the same net core queue */ -static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) -{ - if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) - return tx_queue - EFX_TXQ_TYPE_OFFLOAD; - else - return tx_queue + EFX_TXQ_TYPE_OFFLOAD; -} - static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) { /* We need to consider both queues that the net core sees as one */ @@ -344,6 +335,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct efx_nic *efx = tx_queue->efx; struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; + unsigned int old_insert_count = tx_queue->insert_count; skb_frag_t *fragment; unsigned int len, unmap_len = 0; dma_addr_t dma_addr, unmap_addr = 0; @@ -351,7 +343,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) unsigned short dma_flags; int i = 0; - EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); + EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count); if (skb_shinfo(skb)->gso_size) return efx_enqueue_skb_tso(tx_queue, skb); @@ -369,9 +361,8 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Consider using PIO for short packets */ #ifdef EFX_USE_PIO - if (skb->len <= efx_piobuf_size && tx_queue->piobuf && - efx_nic_tx_is_empty(tx_queue) && - efx_nic_tx_is_empty(efx_tx_queue_partner(tx_queue))) { + if (skb->len <= efx_piobuf_size && !skb->xmit_more && + efx_nic_may_tx_pio(tx_queue)) { buffer = efx_enqueue_skb_pio(tx_queue, skb); dma_flags = EFX_TX_BUF_OPTION; goto finish_packet; @@ -439,13 +430,14 @@ finish_packet: netdev_tx_sent_queue(tx_queue->core_txq, skb->len); + efx_tx_maybe_stop_queue(tx_queue); + /* Pass off to hardware */ - efx_nic_push_buffers(tx_queue); + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + efx_nic_push_buffers(tx_queue); tx_queue->tx_packets++; - efx_tx_maybe_stop_queue(tx_queue); - return NETDEV_TX_OK; dma_err: @@ -458,7 +450,7 @@ finish_packet: dev_kfree_skb_any(skb); /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != tx_queue->write_count) { + while (tx_queue->insert_count != old_insert_count) { unsigned int pkts_compl = 0, bytes_compl = 0; --tx_queue->insert_count; buffer = __efx_tx_queue_get_insert_buffer(tx_queue); @@ -989,12 +981,13 @@ static int efx_tso_put_header(struct efx_tx_queue *tx_queue, /* Remove buffers put into a tx_queue. None of the buffers must have * an skb attached. */ -static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) +static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count) { struct efx_tx_buffer *buffer; /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != tx_queue->write_count) { + while (tx_queue->insert_count != insert_count) { --tx_queue->insert_count; buffer = __efx_tx_queue_get_insert_buffer(tx_queue); efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); @@ -1258,13 +1251,14 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; + unsigned int old_insert_count = tx_queue->insert_count; int frag_i, rc; struct tso_state state; /* Find the packet protocol and sanity-check it */ state.protocol = efx_tso_check_protocol(skb); - EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); + EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count); rc = tso_start(&state, efx, skb); if (rc) @@ -1308,11 +1302,12 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, netdev_tx_sent_queue(tx_queue->core_txq, skb->len); - /* Pass off to hardware */ - efx_nic_push_buffers(tx_queue); - efx_tx_maybe_stop_queue(tx_queue); + /* Pass off to hardware */ + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + efx_nic_push_buffers(tx_queue); + tx_queue->tso_bursts++; return NETDEV_TX_OK; @@ -1336,6 +1331,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr, state.header_unmap_len, DMA_TO_DEVICE); - efx_enqueue_unwind(tx_queue); + efx_enqueue_unwind(tx_queue, old_insert_count); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 552bbc17863..ccfe7e51041 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -3,7 +3,7 @@ * * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited * Author: Srinivas Kandagatla <srinivas.kandagatla@st.com> - * + * Contributors: Giuseppe Cavallaro <peppe.cavallaro@st.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,45 +22,22 @@ #include <linux/of.h> #include <linux/of_net.h> +#define DWMAC_125MHZ 125000000 +#define DWMAC_50MHZ 50000000 +#define DWMAC_25MHZ 25000000 +#define DWMAC_2_5MHZ 2500000 + +#define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \ + iface == PHY_INTERFACE_MODE_RGMII_ID || \ + iface == PHY_INTERFACE_MODE_RGMII_RXID || \ + iface == PHY_INTERFACE_MODE_RGMII_TXID) + +#define IS_PHY_IF_MODE_GBIT(iface) (IS_PHY_IF_MODE_RGMII(iface) || \ + iface == PHY_INTERFACE_MODE_GMII) + +/* STiH4xx register definitions (STiH415/STiH416/STiH407/STiH410 families) */ + /** - * STi GMAC glue logic. - * -------------------- - * - * _ - * | \ - * --------|0 \ ETH_SEL_INTERNAL_NOTEXT_PHYCLK - * phyclk | |___________________________________________ - * | | | (phyclk-in) - * --------|1 / | - * int-clk |_ / | - * | _ - * | | \ - * |_______|1 \ ETH_SEL_TX_RETIME_CLK - * | |___________________________ - * | | (tx-retime-clk) - * _______|0 / - * | |_ / - * _ | - * | \ | - * --------|0 \ | - * clk_125 | |__| - * | | ETH_SEL_TXCLK_NOT_CLK125 - * --------|1 / - * txclk |_ / - * - * - * ETH_SEL_INTERNAL_NOTEXT_PHYCLK is valid only for RMII where PHY can - * generate 50MHz clock or MAC can generate it. - * This bit is configured by "st,ext-phyclk" property. - * - * ETH_SEL_TXCLK_NOT_CLK125 is only valid for gigabit modes, where the 125Mhz - * clock either comes from clk-125 pin or txclk pin. This configuration is - * totally driven by the board wiring. This bit is configured by - * "st,tx-retime-src" property. - * - * TXCLK configuration is different for different phy interface modes - * and changes according to link speed in modes like RGMII. - * * Below table summarizes the clock requirement and clock sources for * supported phy interface modes with link speeds. * ________________________________________________ @@ -74,44 +51,58 @@ * ------------------------------------------------ *| RGMII | 125Mhz | 25Mhz | *| | clk-125/txclk | clkgen | + *| | clkgen | | * ------------------------------------------------ *| RMII | n/a | 25Mhz | *| | |clkgen/phyclk-in | * ------------------------------------------------ * - * TX lines are always retimed with a clk, which can vary depending - * on the board configuration. Below is the table of these bits - * in eth configuration register depending on source of retime clk. - * - *--------------------------------------------------------------- - * src | tx_rt_clk | int_not_ext_phyclk | txclk_n_clk125| - *--------------------------------------------------------------- - * txclk | 0 | n/a | 1 | - *--------------------------------------------------------------- - * ck_125| 0 | n/a | 0 | - *--------------------------------------------------------------- - * phyclk| 1 | 0 | n/a | - *--------------------------------------------------------------- - * clkgen| 1 | 1 | n/a | - *--------------------------------------------------------------- + * Register Configuration + *------------------------------- + * src |BIT(8)| BIT(7)| BIT(6)| + *------------------------------- + * txclk | 0 | n/a | 1 | + *------------------------------- + * ck_125| 0 | n/a | 0 | + *------------------------------- + * phyclk| 1 | 0 | n/a | + *------------------------------- + * clkgen| 1 | 1 | n/a | + *------------------------------- */ - /* Register definition */ +#define STIH4XX_RETIME_SRC_MASK GENMASK(8, 6) +#define STIH4XX_ETH_SEL_TX_RETIME_CLK BIT(8) +#define STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK BIT(7) +#define STIH4XX_ETH_SEL_TXCLK_NOT_CLK125 BIT(6) + +/* STiD127 register definitions */ - /* 3 bits [8:6] - * [6:6] ETH_SEL_TXCLK_NOT_CLK125 - * [7:7] ETH_SEL_INTERNAL_NOTEXT_PHYCLK - * [8:8] ETH_SEL_TX_RETIME_CLK - * - */ +/** + *----------------------- + * src |BIT(6)| BIT(7)| + *----------------------- + * MII | 1 | n/a | + *----------------------- + * RMII | n/a | 1 | + * clkgen| | | + *----------------------- + * RMII | n/a | 0 | + * phyclk| | | + *----------------------- + * RGMII | 1 | n/a | + * clkgen| | | + *----------------------- + */ -#define TX_RETIME_SRC_MASK GENMASK(8, 6) -#define ETH_SEL_TX_RETIME_CLK BIT(8) -#define ETH_SEL_INTERNAL_NOTEXT_PHYCLK BIT(7) -#define ETH_SEL_TXCLK_NOT_CLK125 BIT(6) +#define STID127_RETIME_SRC_MASK GENMASK(7, 6) +#define STID127_ETH_SEL_INTERNAL_NOTEXT_PHYCLK BIT(7) +#define STID127_ETH_SEL_INTERNAL_NOTEXT_TXCLK BIT(6) -#define ENMII_MASK GENMASK(5, 5) -#define ENMII BIT(5) +#define ENMII_MASK GENMASK(5, 5) +#define ENMII BIT(5) +#define EN_MASK GENMASK(1, 1) +#define EN BIT(1) /** * 3 bits [4:2] @@ -120,29 +111,23 @@ * 010-SGMII * 100-RMII */ -#define MII_PHY_SEL_MASK GENMASK(4, 2) -#define ETH_PHY_SEL_RMII BIT(4) -#define ETH_PHY_SEL_SGMII BIT(3) -#define ETH_PHY_SEL_RGMII BIT(2) -#define ETH_PHY_SEL_GMII 0x0 -#define ETH_PHY_SEL_MII 0x0 - -#define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \ - iface == PHY_INTERFACE_MODE_RGMII_ID || \ - iface == PHY_INTERFACE_MODE_RGMII_RXID || \ - iface == PHY_INTERFACE_MODE_RGMII_TXID) - -#define IS_PHY_IF_MODE_GBIT(iface) (IS_PHY_IF_MODE_RGMII(iface) || \ - iface == PHY_INTERFACE_MODE_GMII) +#define MII_PHY_SEL_MASK GENMASK(4, 2) +#define ETH_PHY_SEL_RMII BIT(4) +#define ETH_PHY_SEL_SGMII BIT(3) +#define ETH_PHY_SEL_RGMII BIT(2) +#define ETH_PHY_SEL_GMII 0x0 +#define ETH_PHY_SEL_MII 0x0 struct sti_dwmac { - int interface; - bool ext_phyclk; - bool is_tx_retime_src_clk_125; - struct clk *clk; - int reg; + int interface; /* MII interface */ + bool ext_phyclk; /* Clock from external PHY */ + u32 tx_retime_src; /* TXCLK Retiming*/ + struct clk *clk; /* PHY clock */ + int ctrl_reg; /* GMAC glue-logic control register */ + int clk_sel_reg; /* GMAC ext clk selection register */ struct device *dev; struct regmap *regmap; + u32 speed; }; static u32 phy_intf_sels[] = { @@ -162,74 +147,133 @@ enum { TX_RETIME_SRC_CLKGEN, }; -static const char *const tx_retime_srcs[] = { - [TX_RETIME_SRC_NA] = "", - [TX_RETIME_SRC_TXCLK] = "txclk", - [TX_RETIME_SRC_CLK_125] = "clk_125", - [TX_RETIME_SRC_PHYCLK] = "phyclk", - [TX_RETIME_SRC_CLKGEN] = "clkgen", -}; - -static u32 tx_retime_val[] = { - [TX_RETIME_SRC_TXCLK] = ETH_SEL_TXCLK_NOT_CLK125, +static u32 stih4xx_tx_retime_val[] = { + [TX_RETIME_SRC_TXCLK] = STIH4XX_ETH_SEL_TXCLK_NOT_CLK125, [TX_RETIME_SRC_CLK_125] = 0x0, - [TX_RETIME_SRC_PHYCLK] = ETH_SEL_TX_RETIME_CLK, - [TX_RETIME_SRC_CLKGEN] = ETH_SEL_TX_RETIME_CLK | - ETH_SEL_INTERNAL_NOTEXT_PHYCLK, + [TX_RETIME_SRC_PHYCLK] = STIH4XX_ETH_SEL_TX_RETIME_CLK, + [TX_RETIME_SRC_CLKGEN] = STIH4XX_ETH_SEL_TX_RETIME_CLK + | STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK, }; -static void setup_retime_src(struct sti_dwmac *dwmac, u32 spd) +static void stih4xx_fix_retime_src(void *priv, u32 spd) { - u32 src = 0, freq = 0; - - if (spd == SPEED_100) { - if (dwmac->interface == PHY_INTERFACE_MODE_MII || - dwmac->interface == PHY_INTERFACE_MODE_GMII) { - src = TX_RETIME_SRC_TXCLK; - } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) { - if (dwmac->ext_phyclk) { - src = TX_RETIME_SRC_PHYCLK; - } else { - src = TX_RETIME_SRC_CLKGEN; - freq = 50000000; - } - - } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) { + struct sti_dwmac *dwmac = priv; + u32 src = dwmac->tx_retime_src; + u32 reg = dwmac->ctrl_reg; + u32 freq = 0; + + if (dwmac->interface == PHY_INTERFACE_MODE_MII) { + src = TX_RETIME_SRC_TXCLK; + } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) { + if (dwmac->ext_phyclk) { + src = TX_RETIME_SRC_PHYCLK; + } else { src = TX_RETIME_SRC_CLKGEN; - freq = 25000000; + freq = DWMAC_50MHZ; } + } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) { + /* On GiGa clk source can be either ext or from clkgen */ + if (spd == SPEED_1000) { + freq = DWMAC_125MHZ; + } else { + /* Switch to clkgen for these speeds */ + src = TX_RETIME_SRC_CLKGEN; + if (spd == SPEED_100) + freq = DWMAC_25MHZ; + else if (spd == SPEED_10) + freq = DWMAC_2_5MHZ; + } + } - if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk) - clk_set_rate(dwmac->clk, freq); + if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk && freq) + clk_set_rate(dwmac->clk, freq); - } else if (spd == SPEED_1000) { - if (dwmac->is_tx_retime_src_clk_125) - src = TX_RETIME_SRC_CLK_125; - else - src = TX_RETIME_SRC_TXCLK; + regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK, + stih4xx_tx_retime_val[src]); +} + +static void stid127_fix_retime_src(void *priv, u32 spd) +{ + struct sti_dwmac *dwmac = priv; + u32 reg = dwmac->ctrl_reg; + u32 freq = 0; + u32 val = 0; + + if (dwmac->interface == PHY_INTERFACE_MODE_MII) { + val = STID127_ETH_SEL_INTERNAL_NOTEXT_TXCLK; + } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) { + if (!dwmac->ext_phyclk) { + val = STID127_ETH_SEL_INTERNAL_NOTEXT_PHYCLK; + freq = DWMAC_50MHZ; + } + } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) { + val = STID127_ETH_SEL_INTERNAL_NOTEXT_TXCLK; + if (spd == SPEED_1000) + freq = DWMAC_125MHZ; + else if (spd == SPEED_100) + freq = DWMAC_25MHZ; + else if (spd == SPEED_10) + freq = DWMAC_2_5MHZ; } - regmap_update_bits(dwmac->regmap, dwmac->reg, - TX_RETIME_SRC_MASK, tx_retime_val[src]); + if (dwmac->clk && freq) + clk_set_rate(dwmac->clk, freq); + + regmap_update_bits(dwmac->regmap, reg, STID127_RETIME_SRC_MASK, val); } -static void sti_dwmac_exit(struct platform_device *pdev, void *priv) +static void sti_dwmac_ctrl_init(struct sti_dwmac *dwmac) { - struct sti_dwmac *dwmac = priv; + struct regmap *regmap = dwmac->regmap; + int iface = dwmac->interface; + struct device *dev = dwmac->dev; + struct device_node *np = dev->of_node; + u32 reg = dwmac->ctrl_reg; + u32 val; if (dwmac->clk) - clk_disable_unprepare(dwmac->clk); + clk_prepare_enable(dwmac->clk); + + if (of_property_read_bool(np, "st,gmac_en")) + regmap_update_bits(regmap, reg, EN_MASK, EN); + + regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]); + + val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; + regmap_update_bits(regmap, reg, ENMII_MASK, val); +} + +static int stix4xx_init(struct platform_device *pdev, void *priv) +{ + struct sti_dwmac *dwmac = priv; + u32 spd = dwmac->speed; + + sti_dwmac_ctrl_init(dwmac); + + stih4xx_fix_retime_src(priv, spd); + + return 0; } -static void sti_fix_mac_speed(void *priv, unsigned int spd) +static int stid127_init(struct platform_device *pdev, void *priv) { struct sti_dwmac *dwmac = priv; + u32 spd = dwmac->speed; - setup_retime_src(dwmac, spd); + sti_dwmac_ctrl_init(dwmac); - return; + stid127_fix_retime_src(priv, spd); + + return 0; } +static void sti_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct sti_dwmac *dwmac = priv; + + if (dwmac->clk) + clk_disable_unprepare(dwmac->clk); +} static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, struct platform_device *pdev) { @@ -245,6 +289,13 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-ethconf"); if (!res) return -ENODATA; + dwmac->ctrl_reg = res->start; + + /* clk selection from extra syscfg register */ + dwmac->clk_sel_reg = -ENXIO; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-clkconf"); + if (res) + dwmac->clk_sel_reg = res->start; regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon"); if (IS_ERR(regmap)) @@ -253,53 +304,31 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, dwmac->dev = dev; dwmac->interface = of_get_phy_mode(np); dwmac->regmap = regmap; - dwmac->reg = res->start; dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk"); - dwmac->is_tx_retime_src_clk_125 = false; + dwmac->tx_retime_src = TX_RETIME_SRC_NA; + dwmac->speed = SPEED_100; if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) { const char *rs; + dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN; err = of_property_read_string(np, "st,tx-retime-src", &rs); - if (err < 0) { - dev_err(dev, "st,tx-retime-src not specified\n"); - return err; - } + if (err < 0) + dev_warn(dev, "Use internal clock source\n"); if (!strcasecmp(rs, "clk_125")) - dwmac->is_tx_retime_src_clk_125 = true; + dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125; + else if (!strcasecmp(rs, "txclk")) + dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK; + + dwmac->speed = SPEED_1000; } dwmac->clk = devm_clk_get(dev, "sti-ethclk"); - - if (IS_ERR(dwmac->clk)) + if (IS_ERR(dwmac->clk)) { + dev_warn(dev, "No phy clock provided...\n"); dwmac->clk = NULL; - - return 0; -} - -static int sti_dwmac_init(struct platform_device *pdev, void *priv) -{ - struct sti_dwmac *dwmac = priv; - struct regmap *regmap = dwmac->regmap; - int iface = dwmac->interface; - u32 reg = dwmac->reg; - u32 val, spd; - - if (dwmac->clk) - clk_prepare_enable(dwmac->clk); - - regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]); - - val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; - regmap_update_bits(regmap, reg, ENMII_MASK, val); - - if (IS_PHY_IF_MODE_GBIT(iface)) - spd = SPEED_1000; - else - spd = SPEED_100; - - setup_retime_src(dwmac, spd); + } return 0; } @@ -322,9 +351,16 @@ static void *sti_dwmac_setup(struct platform_device *pdev) return dwmac; } -const struct stmmac_of_data sti_gmac_data = { - .fix_mac_speed = sti_fix_mac_speed, +const struct stmmac_of_data stih4xx_dwmac_data = { + .fix_mac_speed = stih4xx_fix_retime_src, + .setup = sti_dwmac_setup, + .init = stix4xx_init, + .exit = sti_dwmac_exit, +}; + +const struct stmmac_of_data stid127_dwmac_data = { + .fix_mac_speed = stid127_fix_retime_src, .setup = sti_dwmac_setup, - .init = sti_dwmac_init, + .init = stid127_init, .exit = sti_dwmac_exit, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 44528896355..c3c40650b30 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -144,7 +144,8 @@ extern const struct stmmac_of_data meson6_dwmac_data; extern const struct stmmac_of_data sun7i_gmac_data; #endif #ifdef CONFIG_DWMAC_STI -extern const struct stmmac_of_data sti_gmac_data; +extern const struct stmmac_of_data stih4xx_dwmac_data; +extern const struct stmmac_of_data stid127_dwmac_data; #endif #ifdef CONFIG_DWMAC_SOCFPGA extern const struct stmmac_of_data socfpga_gmac_data; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 65217170625..db56fa7ce8f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -37,9 +37,10 @@ static const struct of_device_id stmmac_dt_ids[] = { { .compatible = "allwinner,sun7i-a20-gmac", .data = &sun7i_gmac_data}, #endif #ifdef CONFIG_DWMAC_STI - { .compatible = "st,stih415-dwmac", .data = &sti_gmac_data}, - { .compatible = "st,stih416-dwmac", .data = &sti_gmac_data}, - { .compatible = "st,stid127-dwmac", .data = &sti_gmac_data}, + { .compatible = "st,stih415-dwmac", .data = &stih4xx_dwmac_data}, + { .compatible = "st,stih416-dwmac", .data = &stih4xx_dwmac_data}, + { .compatible = "st,stid127-dwmac", .data = &stid127_dwmac_data}, + { .compatible = "st,stih407-dwmac", .data = &stih4xx_dwmac_data}, #endif #ifdef CONFIG_DWMAC_SOCFPGA { .compatible = "altr,socfpga-stmmac", .data = &socfpga_gmac_data }, @@ -160,11 +161,16 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - plat->mdio_bus_data = devm_kzalloc(&pdev->dev, - sizeof(struct stmmac_mdio_bus_data), - GFP_KERNEL); + if (plat->phy_bus_name) + plat->mdio_bus_data = NULL; + else + plat->mdio_bus_data = + devm_kzalloc(&pdev->dev, + sizeof(struct stmmac_mdio_bus_data), + GFP_KERNEL); - plat->force_sf_dma_mode = of_property_read_bool(np, "snps,force_sf_dma_mode"); + plat->force_sf_dma_mode = + of_property_read_bool(np, "snps,force_sf_dma_mode"); /* Set the maxmtu to a default of JUMBO_LEN in case the * parameter is not present in the device tree. diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index ab167dc49ce..952e1e4764b 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2392,6 +2392,15 @@ clean_ndev_ret: return ret; } +static int cpsw_remove_child_device(struct device *dev, void *c) +{ + struct platform_device *pdev = to_platform_device(dev); + + of_device_unregister(pdev); + + return 0; +} + static int cpsw_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); @@ -2406,6 +2415,7 @@ static int cpsw_remove(struct platform_device *pdev) cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); pm_runtime_disable(&pdev->dev); + device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device); if (priv->data.dual_emac) free_netdev(cpsw_get_slave_ndev(priv, 1)); free_netdev(ndev); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 4a000f6dd6f..657b65bf5ca 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -193,12 +193,9 @@ fail: static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) { - unsigned long flags; - if (!pool) return; - spin_lock_irqsave(&pool->lock, flags); WARN_ON(pool->used_desc); if (pool->cpumap) { dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap, @@ -206,7 +203,6 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) } else { iounmap(pool->iomap); } - spin_unlock_irqrestore(&pool->lock, flags); } static inline dma_addr_t desc_phys(struct cpdma_desc_pool *pool, @@ -561,7 +557,6 @@ int cpdma_chan_destroy(struct cpdma_chan *chan) cpdma_chan_stop(chan); ctlr->channels[chan->chan_num] = NULL; spin_unlock_irqrestore(&ctlr->lock, flags); - kfree(chan); return 0; } EXPORT_SYMBOL_GPL(cpdma_chan_destroy); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 0fcb5e7eb07..9e17d1a91e7 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -162,7 +162,7 @@ union sub_key { * data: network byte order * return: host byte order */ -static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) +static u32 comp_hash(u8 *key, int klen, void *data, int dlen) { union sub_key subk; int k_next = 4; @@ -176,7 +176,7 @@ static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) for (i = 0; i < dlen; i++) { subk.kb = key[k_next]; k_next = (k_next + 1) % klen; - dt = data[i]; + dt = ((u8 *)data)[i]; for (j = 0; j < 8; j++) { if (dt & 0x80) ret ^= subk.ka; @@ -190,26 +190,20 @@ static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) { - struct iphdr *iphdr; + struct flow_keys flow; int data_len; - bool ret = false; - if (eth_hdr(skb)->h_proto != htons(ETH_P_IP)) + if (!skb_flow_dissect(skb, &flow) || flow.n_proto != htons(ETH_P_IP)) return false; - iphdr = ip_hdr(skb); + if (flow.ip_proto == IPPROTO_TCP) + data_len = 12; + else + data_len = 8; - if (iphdr->version == 4) { - if (iphdr->protocol == IPPROTO_TCP) - data_len = 12; - else - data_len = 8; - *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, - (u8 *)&iphdr->saddr, data_len); - ret = true; - } + *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len); - return ret; + return true; } static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0c6adaaf898..65e2892342b 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -298,7 +298,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) */ if (q->flags & IFF_VNET_HDR) features |= vlan->tap_features; - if (netif_needs_gso(skb, features)) { + if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); if (IS_ERR(segs)) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 492435fce1d..8c2a29a9bd7 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -198,8 +198,10 @@ static int ksz8021_config_init(struct phy_device *phydev) if (rc) dev_err(&phydev->dev, "failed to set led mode\n"); - phy_write(phydev, MII_KSZPHY_OMSO, val); rc = ksz_config_flags(phydev); + if (rc < 0) + return rc; + rc = phy_write(phydev, MII_KSZPHY_OMSO, val); return rc < 0 ? rc : 0; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 864159eb744..e3d84c322e4 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3189,31 +3189,39 @@ static void r8153_init(struct r8152 *tp) static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) { struct r8152 *tp = usb_get_intfdata(intf); + struct net_device *netdev = tp->netdev; + int ret = 0; mutex_lock(&tp->control); - if (PMSG_IS_AUTO(message)) + if (PMSG_IS_AUTO(message)) { + if (netif_running(netdev) && work_busy(&tp->schedule.work)) { + ret = -EBUSY; + goto out1; + } + set_bit(SELECTIVE_SUSPEND, &tp->flags); - else - netif_device_detach(tp->netdev); + } else { + netif_device_detach(netdev); + } - if (netif_running(tp->netdev)) { + if (netif_running(netdev)) { clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); - cancel_delayed_work_sync(&tp->schedule); tasklet_disable(&tp->tl); if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_stop_rx(tp); rtl_runtime_suspend_enable(tp, true); } else { + cancel_delayed_work_sync(&tp->schedule); tp->rtl_ops.down(tp); } tasklet_enable(&tp->tl); } - +out1: mutex_unlock(&tp->control); - return 0; + return ret; } static int rtl8152_resume(struct usb_interface *intf) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3d0ce4468ce..d75256bd1a6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -123,9 +123,6 @@ struct virtnet_info { /* Host can handle any s/g split between our header and packet data */ bool any_header_sg; - /* enable config space updates */ - bool config_enable; - /* Active statistics */ struct virtnet_stats __percpu *stats; @@ -135,9 +132,6 @@ struct virtnet_info { /* Work struct for config space updates */ struct work_struct config_work; - /* Lock for config space updates */ - struct mutex config_lock; - /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; @@ -920,6 +914,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) int qnum = skb_get_queue_mapping(skb); struct send_queue *sq = &vi->sq[qnum]; int err; + struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); + bool kick = !skb->xmit_more; /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(sq); @@ -956,7 +952,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) } } - if (__netif_subqueue_stopped(dev, qnum) || !skb->xmit_more) + if (kick || netif_xmit_stopped(txq)) virtqueue_kick(sq->vq); return NETDEV_TX_OK; @@ -1412,13 +1408,9 @@ static void virtnet_config_changed_work(struct work_struct *work) container_of(work, struct virtnet_info, config_work); u16 v; - mutex_lock(&vi->config_lock); - if (!vi->config_enable) - goto done; - if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, struct virtio_net_config, status, &v) < 0) - goto done; + return; if (v & VIRTIO_NET_S_ANNOUNCE) { netdev_notify_peers(vi->dev); @@ -1429,7 +1421,7 @@ static void virtnet_config_changed_work(struct work_struct *work) v &= VIRTIO_NET_S_LINK_UP; if (vi->status == v) - goto done; + return; vi->status = v; @@ -1440,8 +1432,6 @@ static void virtnet_config_changed_work(struct work_struct *work) netif_carrier_off(vi->dev); netif_tx_stop_all_queues(vi->dev); } -done: - mutex_unlock(&vi->config_lock); } static void virtnet_config_changed(struct virtio_device *vdev) @@ -1762,8 +1752,6 @@ static int virtnet_probe(struct virtio_device *vdev) u64_stats_init(&virtnet_stats->rx_syncp); } - mutex_init(&vi->config_lock); - vi->config_enable = true; INIT_WORK(&vi->config_work, virtnet_config_changed_work); /* If we can receive ANY GSO packets, we must allocate large ones. */ @@ -1811,6 +1799,8 @@ static int virtnet_probe(struct virtio_device *vdev) goto free_vqs; } + virtio_device_ready(vdev); + /* Last of all, set up some receive buffers. */ for (i = 0; i < vi->curr_queue_pairs; i++) { try_fill_recv(&vi->rq[i], GFP_KERNEL); @@ -1847,6 +1837,8 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; free_recv_bufs: + vi->vdev->config->reset(vdev); + free_receive_bufs(vi); unregister_netdev(dev); free_vqs: @@ -1880,17 +1872,13 @@ static void virtnet_remove(struct virtio_device *vdev) unregister_hotcpu_notifier(&vi->nb); - /* Prevent config work handler from accessing the device. */ - mutex_lock(&vi->config_lock); - vi->config_enable = false; - mutex_unlock(&vi->config_lock); + /* Make sure no work handler is accessing the device. */ + flush_work(&vi->config_work); unregister_netdev(vi->dev); remove_vq_common(vi); - flush_work(&vi->config_work); - free_percpu(vi->stats); free_netdev(vi->dev); } @@ -1903,10 +1891,8 @@ static int virtnet_freeze(struct virtio_device *vdev) unregister_hotcpu_notifier(&vi->nb); - /* Prevent config work handler from accessing the device */ - mutex_lock(&vi->config_lock); - vi->config_enable = false; - mutex_unlock(&vi->config_lock); + /* Make sure no work handler is accessing the device */ + flush_work(&vi->config_work); netif_device_detach(vi->dev); cancel_delayed_work_sync(&vi->refill); @@ -1921,8 +1907,6 @@ static int virtnet_freeze(struct virtio_device *vdev) remove_vq_common(vi); - flush_work(&vi->config_work); - return 0; } @@ -1935,6 +1919,8 @@ static int virtnet_restore(struct virtio_device *vdev) if (err) return err; + virtio_device_ready(vdev); + if (netif_running(vi->dev)) { for (i = 0; i < vi->curr_queue_pairs; i++) if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) @@ -1946,10 +1932,6 @@ static int virtnet_restore(struct virtio_device *vdev) netif_device_attach(vi->dev); - mutex_lock(&vi->config_lock); - vi->config_enable = true; - mutex_unlock(&vi->config_lock); - rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); rtnl_unlock(); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2a51e6e48e1..ca309820d39 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1437,9 +1437,6 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) if (!in6_dev) goto out; - if (!pskb_may_pull(skb, skb->len)) - goto out; - iphdr = ipv6_hdr(skb); saddr = &iphdr->saddr; daddr = &iphdr->daddr; @@ -1668,6 +1665,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; + struct net_device *dev = skb->dev; + int len = skb->len; tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats); @@ -1691,16 +1690,16 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; - tx_stats->tx_bytes += skb->len; + tx_stats->tx_bytes += len; u64_stats_update_end(&tx_stats->syncp); if (netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; + rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { - skb->dev->stats.rx_dropped++; + dev->stats.rx_dropped++; } } @@ -1878,7 +1877,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) return arp_reduce(dev, skb); #if IS_ENABLED(CONFIG_IPV6) else if (ntohs(eth->h_proto) == ETH_P_IPV6 && - skb->len >= sizeof(struct ipv6hdr) + sizeof(struct nd_msg) && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { struct nd_msg *msg; @@ -1887,6 +1887,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) return neigh_reduce(dev, skb); } + eth = eth_hdr(skb); #endif } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index fa671442f42..cca871346a0 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -638,7 +638,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!netif_carrier_ok(dev) || (slots > 1 && !xennet_can_sg(dev)) || - netif_needs_gso(skb, netif_skb_features(skb)))) { + netif_needs_gso(dev, skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&queue->tx_lock, flags); goto drop; } diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 76bed1743db..56046ab3962 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -386,6 +386,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */ blk_queue_max_segments(rq, nr_max_blk); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rq); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, rq); scm_blk_dev_cluster_setup(bdev); bdev->gendisk = alloc_disk(SCM_NR_PARTS); diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 6969d39f1e2..9e0de9c9a6f 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -346,6 +346,7 @@ static int __init xpram_setup_blkdev(void) goto out; } queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xpram_queues[i]); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]); blk_queue_make_request(xpram_queues[i], xpram_make_request); blk_queue_logical_block_size(xpram_queues[i], 4096); } diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index a1349653c6d..643129070c5 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -406,15 +406,8 @@ static void kvm_extint_handler(struct ext_code ext_code, switch (param) { case VIRTIO_PARAM_CONFIG_CHANGED: - { - struct virtio_driver *drv; - drv = container_of(vq->vdev->dev.driver, - struct virtio_driver, driver); - if (drv->config_changed) - drv->config_changed(vq->vdev); - + virtio_config_changed(vq->vdev); break; - } case VIRTIO_PARAM_DEV_ADD: schedule_work(&hotplug_work); break; diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index d2c0b442bce..6cbe6ef3c88 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -940,11 +940,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vring_interrupt(0, vq); } if (test_bit(0, &vcdev->indicators2)) { - drv = container_of(vcdev->vdev.dev.driver, - struct virtio_driver, driver); - - if (drv && drv->config_changed) - drv->config_changed(&vcdev->vdev); + virtio_config_changed(&vcdev->vdev); clear_bit(0, &vcdev->indicators2); } } diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 296619b7426..3a820f61ce6 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -73,7 +73,6 @@ comment "SCSI support type (disk, tape, CD-ROM)" config BLK_DEV_SD tristate "SCSI disk support" depends on SCSI - select CRC_T10DIF if BLK_DEV_INTEGRITY ---help--- If you want to use SCSI hard disks, Fibre Channel disks, Serial ATA (SATA) or Parallel ATA (PATA) hard disks, diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 02e69e7ee4a..3e0a0d315f7 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -259,6 +259,7 @@ static void send_act_open_req(struct cxgbi_sock *csk, struct sk_buff *skb, cxgb4_l2t_send(csk->cdev->ports[csk->port_id], skb, csk->l2t); } +#if IS_ENABLED(CONFIG_IPV6) static void send_act_open_req6(struct cxgbi_sock *csk, struct sk_buff *skb, struct l2t_entry *e) { @@ -344,6 +345,7 @@ static void send_act_open_req6(struct cxgbi_sock *csk, struct sk_buff *skb, cxgb4_l2t_send(csk->cdev->ports[csk->port_id], skb, csk->l2t); } +#endif static void send_close_req(struct cxgbi_sock *csk) { @@ -756,7 +758,7 @@ static int act_open_rpl_status_to_errno(int status) static void csk_act_open_retry_timer(unsigned long data) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct cxgbi_sock *csk = (struct cxgbi_sock *)data; struct cxgb4_lld_info *lldi = cxgbi_cdev_priv(csk->cdev); void (*send_act_open_func)(struct cxgbi_sock *, struct sk_buff *, @@ -781,9 +783,11 @@ static void csk_act_open_retry_timer(unsigned long data) if (csk->csk_family == AF_INET) { send_act_open_func = send_act_open_req; skb = alloc_wr(size, 0, GFP_ATOMIC); +#if IS_ENABLED(CONFIG_IPV6) } else { send_act_open_func = send_act_open_req6; skb = alloc_wr(size6, 0, GFP_ATOMIC); +#endif } if (!skb) @@ -1313,11 +1317,6 @@ static int init_act_open(struct cxgbi_sock *csk) cxgbi_sock_set_flag(csk, CTPF_HAS_ATID); cxgbi_sock_get(csk); - n = dst_neigh_lookup(csk->dst, &csk->daddr.sin_addr.s_addr); - if (!n) { - pr_err("%s, can't get neighbour of csk->dst.\n", ndev->name); - goto rel_resource; - } csk->l2t = cxgb4_l2t_get(lldi->l2t, n, ndev, 0); if (!csk->l2t) { pr_err("%s, cannot alloc l2t.\n", ndev->name); @@ -1335,8 +1334,10 @@ static int init_act_open(struct cxgbi_sock *csk) if (csk->csk_family == AF_INET) skb = alloc_wr(size, 0, GFP_NOIO); +#if IS_ENABLED(CONFIG_IPV6) else skb = alloc_wr(size6, 0, GFP_NOIO); +#endif if (!skb) goto rel_resource; @@ -1370,8 +1371,10 @@ static int init_act_open(struct cxgbi_sock *csk) cxgbi_sock_set_state(csk, CTP_ACTIVE_OPEN); if (csk->csk_family == AF_INET) send_act_open_req(csk, skb, csk->l2t); +#if IS_ENABLED(CONFIG_IPV6) else send_act_open_req6(csk, skb, csk->l2t); +#endif neigh_release(n); return 0; @@ -1635,129 +1638,6 @@ static int cxgb4i_ddp_init(struct cxgbi_device *cdev) return 0; } -#if IS_ENABLED(CONFIG_IPV6) -static int cxgbi_inet6addr_handler(struct notifier_block *this, - unsigned long event, void *data) -{ - struct inet6_ifaddr *ifa = data; - struct net_device *event_dev = ifa->idev->dev; - struct cxgbi_device *cdev; - int ret = NOTIFY_DONE; - - if (event_dev->priv_flags & IFF_802_1Q_VLAN) - event_dev = vlan_dev_real_dev(event_dev); - - cdev = cxgbi_device_find_by_netdev_rcu(event_dev, NULL); - - if (!cdev) - return ret; - - switch (event) { - case NETDEV_UP: - ret = cxgb4_clip_get(event_dev, - (const struct in6_addr *) - ((ifa)->addr.s6_addr)); - if (ret < 0) - return ret; - - ret = NOTIFY_OK; - break; - - case NETDEV_DOWN: - cxgb4_clip_release(event_dev, - (const struct in6_addr *) - ((ifa)->addr.s6_addr)); - ret = NOTIFY_OK; - break; - - default: - break; - } - - return ret; -} - -static struct notifier_block cxgbi_inet6addr_notifier = { - .notifier_call = cxgbi_inet6addr_handler -}; - -/* Retrieve IPv6 addresses from a root device (bond, vlan) associated with - * a physical device. - * The physical device reference is needed to send the actual CLIP command. - */ -static int update_dev_clip(struct net_device *root_dev, struct net_device *dev) -{ - struct inet6_dev *idev = NULL; - struct inet6_ifaddr *ifa; - int ret = 0; - - idev = __in6_dev_get(root_dev); - if (!idev) - return ret; - - read_lock_bh(&idev->lock); - list_for_each_entry(ifa, &idev->addr_list, if_list) { - pr_info("updating the clip for addr %pI6\n", - ifa->addr.s6_addr); - ret = cxgb4_clip_get(dev, (const struct in6_addr *) - ifa->addr.s6_addr); - if (ret < 0) - break; - } - - read_unlock_bh(&idev->lock); - return ret; -} - -static int update_root_dev_clip(struct net_device *dev) -{ - struct net_device *root_dev = NULL; - int i, ret = 0; - - /* First populate the real net device's IPv6 address */ - ret = update_dev_clip(dev, dev); - if (ret) - return ret; - - /* Parse all bond and vlan devices layered on top of the physical dev */ - root_dev = netdev_master_upper_dev_get(dev); - if (root_dev) { - ret = update_dev_clip(root_dev, dev); - if (ret) - return ret; - } - - for (i = 0; i < VLAN_N_VID; i++) { - root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i); - if (!root_dev) - continue; - - ret = update_dev_clip(root_dev, dev); - if (ret) - break; - } - return ret; -} - -static void cxgbi_update_clip(struct cxgbi_device *cdev) -{ - int i; - - rcu_read_lock(); - - for (i = 0; i < cdev->nports; i++) { - struct net_device *dev = cdev->ports[i]; - int ret = 0; - - if (dev) - ret = update_root_dev_clip(dev); - if (ret < 0) - break; - } - rcu_read_unlock(); -} -#endif /* IS_ENABLED(CONFIG_IPV6) */ - static void *t4_uld_add(const struct cxgb4_lld_info *lldi) { struct cxgbi_device *cdev; @@ -1876,10 +1756,6 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state) switch (state) { case CXGB4_STATE_UP: pr_info("cdev 0x%p, UP.\n", cdev); -#if IS_ENABLED(CONFIG_IPV6) - cxgbi_update_clip(cdev); -#endif - /* re-initialize */ break; case CXGB4_STATE_START_RECOVERY: pr_info("cdev 0x%p, RECOVERY.\n", cdev); @@ -1910,17 +1786,11 @@ static int __init cxgb4i_init_module(void) return rc; cxgb4_register_uld(CXGB4_ULD_ISCSI, &cxgb4i_uld_info); -#if IS_ENABLED(CONFIG_IPV6) - register_inet6addr_notifier(&cxgbi_inet6addr_notifier); -#endif return 0; } static void __exit cxgb4i_exit_module(void) { -#if IS_ENABLED(CONFIG_IPV6) - unregister_inet6addr_notifier(&cxgbi_inet6addr_notifier); -#endif cxgb4_unregister_uld(CXGB4_ULD_ISCSI); cxgbi_device_unregister_all(CXGBI_FLAG_DEV_T4); cxgbi_iscsi_cleanup(&cxgb4i_iscsi_transport, &cxgb4i_stt); diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 6a2001d6b44..54fa6e0bc1b 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -275,6 +275,7 @@ struct cxgbi_device *cxgbi_device_find_by_netdev_rcu(struct net_device *ndev, } EXPORT_SYMBOL_GPL(cxgbi_device_find_by_netdev_rcu); +#if IS_ENABLED(CONFIG_IPV6) static struct cxgbi_device *cxgbi_device_find_by_mac(struct net_device *ndev, int *port) { @@ -307,6 +308,7 @@ static struct cxgbi_device *cxgbi_device_find_by_mac(struct net_device *ndev, ndev, ndev->name); return NULL; } +#endif void cxgbi_hbas_remove(struct cxgbi_device *cdev) { diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 7bcf67eec92..e99507ed0e3 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -115,7 +115,7 @@ static struct request *get_alua_req(struct scsi_device *sdev, rq = blk_get_request(q, rw, GFP_NOIO); - if (!rq) { + if (IS_ERR(rq)) { sdev_printk(KERN_INFO, sdev, "%s: blk_get_request failed\n", __func__); return NULL; diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 6f07f7fe3aa..84765384c47 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -275,7 +275,7 @@ static struct request *get_req(struct scsi_device *sdev, int cmd, rq = blk_get_request(sdev->request_queue, (cmd != INQUIRY) ? WRITE : READ, GFP_NOIO); - if (!rq) { + if (IS_ERR(rq)) { sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed"); return NULL; } diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index e9d9fea9e27..4ee2759f529 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -117,7 +117,7 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) retry: req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); - if (!req) + if (IS_ERR(req)) return SCSI_DH_RES_TEMP_UNAVAIL; blk_rq_set_block_pc(req); @@ -247,7 +247,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h) struct request *req; req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC); - if (!req) + if (IS_ERR(req)) return SCSI_DH_RES_TEMP_UNAVAIL; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 826069db984..1b5bc9293e3 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -274,7 +274,7 @@ static struct request *get_rdac_req(struct scsi_device *sdev, rq = blk_get_request(q, rw, GFP_NOIO); - if (!rq) { + if (IS_ERR(rq)) { sdev_printk(KERN_INFO, sdev, "get_rdac_req: blk_get_request failed.\n"); return NULL; diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 5f4cbf0c475..fd19fd8468a 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1567,8 +1567,8 @@ static struct request *_make_request(struct request_queue *q, bool has_write, struct request *req; req = blk_get_request(q, has_write ? WRITE : READ, flags); - if (unlikely(!req)) - return ERR_PTR(-ENOMEM); + if (IS_ERR(req)) + return req; blk_rq_set_block_pc(req); return req; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 0727ea7cc38..dff37a250d7 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -362,7 +362,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, int write = (data_direction == DMA_TO_DEVICE); req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); - if (!req) + if (IS_ERR(req)) return DRIVER_ERROR << 24; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 6b20ef3fee5..9a6f8468225 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1961,6 +1961,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) * request becomes available */ req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); + if (IS_ERR(req)) + return; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index db8c449282f..9eff8a37513 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -221,7 +221,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int ret = DRIVER_ERROR << 24; req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); - if (!req) + if (IS_ERR(req)) return ret; blk_rq_set_block_pc(req); @@ -715,7 +715,7 @@ static bool scsi_end_request(struct request *req, int error, if (req->mq_ctx) { /* - * In the MQ case the command gets freed by __blk_mq_end_io, + * In the MQ case the command gets freed by __blk_mq_end_request, * so we have to do all cleanup that depends on it earlier. * * We also can't kick the queues from irq context, so we @@ -723,7 +723,7 @@ static bool scsi_end_request(struct request *req, int error, */ scsi_mq_uninit_cmd(cmd); - __blk_mq_end_io(req, error); + __blk_mq_end_request(req, error); if (scsi_target(sdev)->single_lun || !list_empty(&sdev->host->starved_list)) @@ -1847,6 +1847,8 @@ static int scsi_mq_prep_fn(struct request *req) next_rq->special = bidi_sdb; } + blk_mq_start_request(req); + return scsi_setup_cmnd(sdev, req); } @@ -1856,7 +1858,8 @@ static void scsi_mq_done(struct scsi_cmnd *cmd) blk_mq_complete_request(cmd->request); } -static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) +static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, + bool last) { struct request_queue *q = req->q; struct scsi_device *sdev = q->queuedata; @@ -1880,11 +1883,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) if (!scsi_host_queue_ready(q, shost, sdev)) goto out_dec_target_busy; + if (!(req->cmd_flags & REQ_DONTPREP)) { ret = prep_to_mq(scsi_mq_prep_fn(req)); if (ret) goto out_dec_host_busy; req->cmd_flags |= REQ_DONTPREP; + } else { + blk_mq_start_request(req); } scsi_init_cmd_errh(cmd); @@ -1931,6 +1937,14 @@ out: return ret; } +static enum blk_eh_timer_return scsi_timeout(struct request *req, + bool reserved) +{ + if (reserved) + return BLK_EH_RESET_TIMER; + return scsi_times_out(req); +} + static int scsi_init_request(void *data, struct request *rq, unsigned int hctx_idx, unsigned int request_idx, unsigned int numa_node) @@ -2042,7 +2056,7 @@ static struct blk_mq_ops scsi_mq_ops = { .map_queue = blk_mq_map_queue, .queue_rq = scsi_queue_rq, .complete = scsi_softirq_done, - .timeout = scsi_times_out, + .timeout = scsi_timeout, .init_request = scsi_init_request, .exit_request = scsi_exit_request, }; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0cb5c9f0c74..cfba74cd8e8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -610,29 +610,44 @@ static void scsi_disk_put(struct scsi_disk *sdkp) mutex_unlock(&sd_ref_mutex); } -static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif) -{ - unsigned int prot_op = SCSI_PROT_NORMAL; - unsigned int dix = scsi_prot_sg_count(scmd); - - if (scmd->sc_data_direction == DMA_FROM_DEVICE) { - if (dif && dix) - prot_op = SCSI_PROT_READ_PASS; - else if (dif && !dix) - prot_op = SCSI_PROT_READ_STRIP; - else if (!dif && dix) - prot_op = SCSI_PROT_READ_INSERT; - } else { - if (dif && dix) - prot_op = SCSI_PROT_WRITE_PASS; - else if (dif && !dix) - prot_op = SCSI_PROT_WRITE_INSERT; - else if (!dif && dix) - prot_op = SCSI_PROT_WRITE_STRIP; + + +static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, + unsigned int dix, unsigned int dif) +{ + struct bio *bio = scmd->request->bio; + unsigned int prot_op = sd_prot_op(rq_data_dir(scmd->request), dix, dif); + unsigned int protect = 0; + + if (dix) { /* DIX Type 0, 1, 2, 3 */ + if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM)) + scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM; + + if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false) + scmd->prot_flags |= SCSI_PROT_GUARD_CHECK; + } + + if (dif != SD_DIF_TYPE3_PROTECTION) { /* DIX/DIF Type 0, 1, 2 */ + scmd->prot_flags |= SCSI_PROT_REF_INCREMENT; + + if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false) + scmd->prot_flags |= SCSI_PROT_REF_CHECK; + } + + if (dif) { /* DIX/DIF Type 1, 2, 3 */ + scmd->prot_flags |= SCSI_PROT_TRANSFER_PI; + + if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK)) + protect = 3 << 5; /* Disable target PI checking */ + else + protect = 1 << 5; /* Enable target PI checking */ } scsi_set_prot_op(scmd, prot_op); scsi_set_prot_type(scmd, dif); + scmd->prot_flags &= sd_prot_flag_mask(prot_op); + + return protect; } static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) @@ -893,7 +908,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) sector_t block = blk_rq_pos(rq); sector_t threshold; unsigned int this_count = blk_rq_sectors(rq); - int ret, host_dif; + unsigned int dif, dix; + int ret; unsigned char protect; ret = scsi_init_io(SCpnt, GFP_ATOMIC); @@ -995,7 +1011,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) SCpnt->cmnd[0] = WRITE_6; if (blk_integrity_rq(rq)) - sd_dif_prepare(rq, block, sdp->sector_size); + sd_dif_prepare(SCpnt); } else if (rq_data_dir(rq) == READ) { SCpnt->cmnd[0] = READ_6; @@ -1010,14 +1026,15 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) "writing" : "reading", this_count, blk_rq_sectors(rq))); - /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ - host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); - if (host_dif) - protect = 1 << 5; + dix = scsi_prot_sg_count(SCpnt); + dif = scsi_host_dif_capable(SCpnt->device->host, sdkp->protection_type); + + if (dif || dix) + protect = sd_setup_protect_cmnd(SCpnt, dix, dif); else protect = 0; - if (host_dif == SD_DIF_TYPE2_PROTECTION) { + if (protect && sdkp->protection_type == SD_DIF_TYPE2_PROTECTION) { SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); if (unlikely(SCpnt->cmnd == NULL)) { @@ -1102,10 +1119,6 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) } SCpnt->sdb.length = this_count * sdp->sector_size; - /* If DIF or DIX is enabled, tell HBA how to handle request */ - if (host_dif || scsi_prot_sg_count(SCpnt)) - sd_prot_op(SCpnt, host_dif); - /* * We shouldn't disconnect in the middle of a sector, so with a dumb * host adapter, it's safe to assume that we can at least transfer @@ -2664,8 +2677,10 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) rot = get_unaligned_be16(&buffer[4]); - if (rot == 1) + if (rot == 1) { queue_flag_set_unlocked(QUEUE_FLAG_NONROT, sdkp->disk->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, sdkp->disk->queue); + } out: kfree(buffer); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 4c3ab8377fd..467377884b6 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -167,6 +167,68 @@ enum sd_dif_target_protection_types { }; /* + * Look up the DIX operation based on whether the command is read or + * write and whether dix and dif are enabled. + */ +static inline unsigned int sd_prot_op(bool write, bool dix, bool dif) +{ + /* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */ + const unsigned int ops[] = { /* wrt dix dif */ + SCSI_PROT_NORMAL, /* 0 0 0 */ + SCSI_PROT_READ_STRIP, /* 0 0 1 */ + SCSI_PROT_READ_INSERT, /* 0 1 0 */ + SCSI_PROT_READ_PASS, /* 0 1 1 */ + SCSI_PROT_NORMAL, /* 1 0 0 */ + SCSI_PROT_WRITE_INSERT, /* 1 0 1 */ + SCSI_PROT_WRITE_STRIP, /* 1 1 0 */ + SCSI_PROT_WRITE_PASS, /* 1 1 1 */ + }; + + return ops[write << 2 | dix << 1 | dif]; +} + +/* + * Returns a mask of the protection flags that are valid for a given DIX + * operation. + */ +static inline unsigned int sd_prot_flag_mask(unsigned int prot_op) +{ + const unsigned int flag_mask[] = { + [SCSI_PROT_NORMAL] = 0, + + [SCSI_PROT_READ_STRIP] = SCSI_PROT_TRANSFER_PI | + SCSI_PROT_GUARD_CHECK | + SCSI_PROT_REF_CHECK | + SCSI_PROT_REF_INCREMENT, + + [SCSI_PROT_READ_INSERT] = SCSI_PROT_REF_INCREMENT | + SCSI_PROT_IP_CHECKSUM, + + [SCSI_PROT_READ_PASS] = SCSI_PROT_TRANSFER_PI | + SCSI_PROT_GUARD_CHECK | + SCSI_PROT_REF_CHECK | + SCSI_PROT_REF_INCREMENT | + SCSI_PROT_IP_CHECKSUM, + + [SCSI_PROT_WRITE_INSERT] = SCSI_PROT_TRANSFER_PI | + SCSI_PROT_REF_INCREMENT, + + [SCSI_PROT_WRITE_STRIP] = SCSI_PROT_GUARD_CHECK | + SCSI_PROT_REF_CHECK | + SCSI_PROT_REF_INCREMENT | + SCSI_PROT_IP_CHECKSUM, + + [SCSI_PROT_WRITE_PASS] = SCSI_PROT_TRANSFER_PI | + SCSI_PROT_GUARD_CHECK | + SCSI_PROT_REF_CHECK | + SCSI_PROT_REF_INCREMENT | + SCSI_PROT_IP_CHECKSUM, + }; + + return flag_mask[prot_op]; +} + +/* * Data Integrity Field tuple. */ struct sd_dif_tuple { @@ -178,7 +240,7 @@ struct sd_dif_tuple { #ifdef CONFIG_BLK_DEV_INTEGRITY extern void sd_dif_config_host(struct scsi_disk *); -extern void sd_dif_prepare(struct request *rq, sector_t, unsigned int); +extern void sd_dif_prepare(struct scsi_cmnd *scmd); extern void sd_dif_complete(struct scsi_cmnd *, unsigned int); #else /* CONFIG_BLK_DEV_INTEGRITY */ @@ -186,7 +248,7 @@ extern void sd_dif_complete(struct scsi_cmnd *, unsigned int); static inline void sd_dif_config_host(struct scsi_disk *disk) { } -static inline int sd_dif_prepare(struct request *rq, sector_t s, unsigned int a) +static inline int sd_dif_prepare(struct scsi_cmnd *scmd) { return 0; } diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index a7a691d0af7..14c7d42a11c 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -21,7 +21,7 @@ */ #include <linux/blkdev.h> -#include <linux/crc-t10dif.h> +#include <linux/t10-pi.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -33,268 +33,8 @@ #include <scsi/scsi_ioctl.h> #include <scsi/scsicam.h> -#include <net/checksum.h> - #include "sd.h" -typedef __u16 (csum_fn) (void *, unsigned int); - -static __u16 sd_dif_crc_fn(void *data, unsigned int len) -{ - return cpu_to_be16(crc_t10dif(data, len)); -} - -static __u16 sd_dif_ip_fn(void *data, unsigned int len) -{ - return ip_compute_csum(data, len); -} - -/* - * Type 1 and Type 2 protection use the same format: 16 bit guard tag, - * 16 bit app tag, 32 bit reference tag. - */ -static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn) -{ - void *buf = bix->data_buf; - struct sd_dif_tuple *sdt = bix->prot_buf; - sector_t sector = bix->sector; - unsigned int i; - - for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) { - sdt->guard_tag = fn(buf, bix->sector_size); - sdt->ref_tag = cpu_to_be32(sector & 0xffffffff); - sdt->app_tag = 0; - - buf += bix->sector_size; - sector++; - } -} - -static void sd_dif_type1_generate_crc(struct blk_integrity_exchg *bix) -{ - sd_dif_type1_generate(bix, sd_dif_crc_fn); -} - -static void sd_dif_type1_generate_ip(struct blk_integrity_exchg *bix) -{ - sd_dif_type1_generate(bix, sd_dif_ip_fn); -} - -static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn) -{ - void *buf = bix->data_buf; - struct sd_dif_tuple *sdt = bix->prot_buf; - sector_t sector = bix->sector; - unsigned int i; - __u16 csum; - - for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) { - /* Unwritten sectors */ - if (sdt->app_tag == 0xffff) - return 0; - - if (be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) { - printk(KERN_ERR - "%s: ref tag error on sector %lu (rcvd %u)\n", - bix->disk_name, (unsigned long)sector, - be32_to_cpu(sdt->ref_tag)); - return -EIO; - } - - csum = fn(buf, bix->sector_size); - - if (sdt->guard_tag != csum) { - printk(KERN_ERR "%s: guard tag error on sector %lu " \ - "(rcvd %04x, data %04x)\n", bix->disk_name, - (unsigned long)sector, - be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum)); - return -EIO; - } - - buf += bix->sector_size; - sector++; - } - - return 0; -} - -static int sd_dif_type1_verify_crc(struct blk_integrity_exchg *bix) -{ - return sd_dif_type1_verify(bix, sd_dif_crc_fn); -} - -static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix) -{ - return sd_dif_type1_verify(bix, sd_dif_ip_fn); -} - -/* - * Functions for interleaving and deinterleaving application tags - */ -static void sd_dif_type1_set_tag(void *prot, void *tag_buf, unsigned int sectors) -{ - struct sd_dif_tuple *sdt = prot; - u8 *tag = tag_buf; - unsigned int i, j; - - for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) { - sdt->app_tag = tag[j] << 8 | tag[j+1]; - BUG_ON(sdt->app_tag == 0xffff); - } -} - -static void sd_dif_type1_get_tag(void *prot, void *tag_buf, unsigned int sectors) -{ - struct sd_dif_tuple *sdt = prot; - u8 *tag = tag_buf; - unsigned int i, j; - - for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) { - tag[j] = (sdt->app_tag & 0xff00) >> 8; - tag[j+1] = sdt->app_tag & 0xff; - } -} - -static struct blk_integrity dif_type1_integrity_crc = { - .name = "T10-DIF-TYPE1-CRC", - .generate_fn = sd_dif_type1_generate_crc, - .verify_fn = sd_dif_type1_verify_crc, - .get_tag_fn = sd_dif_type1_get_tag, - .set_tag_fn = sd_dif_type1_set_tag, - .tuple_size = sizeof(struct sd_dif_tuple), - .tag_size = 0, -}; - -static struct blk_integrity dif_type1_integrity_ip = { - .name = "T10-DIF-TYPE1-IP", - .generate_fn = sd_dif_type1_generate_ip, - .verify_fn = sd_dif_type1_verify_ip, - .get_tag_fn = sd_dif_type1_get_tag, - .set_tag_fn = sd_dif_type1_set_tag, - .tuple_size = sizeof(struct sd_dif_tuple), - .tag_size = 0, -}; - - -/* - * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque - * tag space. - */ -static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn) -{ - void *buf = bix->data_buf; - struct sd_dif_tuple *sdt = bix->prot_buf; - unsigned int i; - - for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) { - sdt->guard_tag = fn(buf, bix->sector_size); - sdt->ref_tag = 0; - sdt->app_tag = 0; - - buf += bix->sector_size; - } -} - -static void sd_dif_type3_generate_crc(struct blk_integrity_exchg *bix) -{ - sd_dif_type3_generate(bix, sd_dif_crc_fn); -} - -static void sd_dif_type3_generate_ip(struct blk_integrity_exchg *bix) -{ - sd_dif_type3_generate(bix, sd_dif_ip_fn); -} - -static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn) -{ - void *buf = bix->data_buf; - struct sd_dif_tuple *sdt = bix->prot_buf; - sector_t sector = bix->sector; - unsigned int i; - __u16 csum; - - for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) { - /* Unwritten sectors */ - if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff) - return 0; - - csum = fn(buf, bix->sector_size); - - if (sdt->guard_tag != csum) { - printk(KERN_ERR "%s: guard tag error on sector %lu " \ - "(rcvd %04x, data %04x)\n", bix->disk_name, - (unsigned long)sector, - be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum)); - return -EIO; - } - - buf += bix->sector_size; - sector++; - } - - return 0; -} - -static int sd_dif_type3_verify_crc(struct blk_integrity_exchg *bix) -{ - return sd_dif_type3_verify(bix, sd_dif_crc_fn); -} - -static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix) -{ - return sd_dif_type3_verify(bix, sd_dif_ip_fn); -} - -static void sd_dif_type3_set_tag(void *prot, void *tag_buf, unsigned int sectors) -{ - struct sd_dif_tuple *sdt = prot; - u8 *tag = tag_buf; - unsigned int i, j; - - for (i = 0, j = 0 ; i < sectors ; i++, j += 6, sdt++) { - sdt->app_tag = tag[j] << 8 | tag[j+1]; - sdt->ref_tag = tag[j+2] << 24 | tag[j+3] << 16 | - tag[j+4] << 8 | tag[j+5]; - } -} - -static void sd_dif_type3_get_tag(void *prot, void *tag_buf, unsigned int sectors) -{ - struct sd_dif_tuple *sdt = prot; - u8 *tag = tag_buf; - unsigned int i, j; - - for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) { - tag[j] = (sdt->app_tag & 0xff00) >> 8; - tag[j+1] = sdt->app_tag & 0xff; - tag[j+2] = (sdt->ref_tag & 0xff000000) >> 24; - tag[j+3] = (sdt->ref_tag & 0xff0000) >> 16; - tag[j+4] = (sdt->ref_tag & 0xff00) >> 8; - tag[j+5] = sdt->ref_tag & 0xff; - BUG_ON(sdt->app_tag == 0xffff || sdt->ref_tag == 0xffffffff); - } -} - -static struct blk_integrity dif_type3_integrity_crc = { - .name = "T10-DIF-TYPE3-CRC", - .generate_fn = sd_dif_type3_generate_crc, - .verify_fn = sd_dif_type3_verify_crc, - .get_tag_fn = sd_dif_type3_get_tag, - .set_tag_fn = sd_dif_type3_set_tag, - .tuple_size = sizeof(struct sd_dif_tuple), - .tag_size = 0, -}; - -static struct blk_integrity dif_type3_integrity_ip = { - .name = "T10-DIF-TYPE3-IP", - .generate_fn = sd_dif_type3_generate_ip, - .verify_fn = sd_dif_type3_verify_ip, - .get_tag_fn = sd_dif_type3_get_tag, - .set_tag_fn = sd_dif_type3_set_tag, - .tuple_size = sizeof(struct sd_dif_tuple), - .tag_size = 0, -}; - /* * Configure exchange of protection information between OS and HBA. */ @@ -316,22 +56,30 @@ void sd_dif_config_host(struct scsi_disk *sdkp) return; /* Enable DMA of protection information */ - if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) + if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) { if (type == SD_DIF_TYPE3_PROTECTION) - blk_integrity_register(disk, &dif_type3_integrity_ip); + blk_integrity_register(disk, &t10_pi_type3_ip); else - blk_integrity_register(disk, &dif_type1_integrity_ip); - else + blk_integrity_register(disk, &t10_pi_type1_ip); + + disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM; + } else if (type == SD_DIF_TYPE3_PROTECTION) - blk_integrity_register(disk, &dif_type3_integrity_crc); + blk_integrity_register(disk, &t10_pi_type3_crc); else - blk_integrity_register(disk, &dif_type1_integrity_crc); + blk_integrity_register(disk, &t10_pi_type1_crc); sd_printk(KERN_NOTICE, sdkp, "Enabling DIX %s protection\n", disk->integrity->name); /* Signal to block layer that we support sector tagging */ - if (dif && type && sdkp->ATO) { + if (dif && type) { + + disk->integrity->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + + if (!sdkp) + return; + if (type == SD_DIF_TYPE3_PROTECTION) disk->integrity->tag_size = sizeof(u16) + sizeof(u32); else @@ -358,50 +106,49 @@ void sd_dif_config_host(struct scsi_disk *sdkp) * * Type 3 does not have a reference tag so no remapping is required. */ -void sd_dif_prepare(struct request *rq, sector_t hw_sector, - unsigned int sector_sz) +void sd_dif_prepare(struct scsi_cmnd *scmd) { - const int tuple_sz = sizeof(struct sd_dif_tuple); + const int tuple_sz = sizeof(struct t10_pi_tuple); struct bio *bio; struct scsi_disk *sdkp; - struct sd_dif_tuple *sdt; + struct t10_pi_tuple *pi; u32 phys, virt; - sdkp = rq->bio->bi_bdev->bd_disk->private_data; + sdkp = scsi_disk(scmd->request->rq_disk); if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION) return; - phys = hw_sector & 0xffffffff; + phys = scsi_prot_ref_tag(scmd); - __rq_for_each_bio(bio, rq) { + __rq_for_each_bio(bio, scmd->request) { + struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_vec iv; struct bvec_iter iter; unsigned int j; /* Already remapped? */ - if (bio_flagged(bio, BIO_MAPPED_INTEGRITY)) + if (bip->bip_flags & BIP_MAPPED_INTEGRITY) break; - virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff; + virt = bip_get_seed(bip) & 0xffffffff; - bip_for_each_vec(iv, bio->bi_integrity, iter) { - sdt = kmap_atomic(iv.bv_page) - + iv.bv_offset; + bip_for_each_vec(iv, bip, iter) { + pi = kmap_atomic(iv.bv_page) + iv.bv_offset; - for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) { + for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) { - if (be32_to_cpu(sdt->ref_tag) == virt) - sdt->ref_tag = cpu_to_be32(phys); + if (be32_to_cpu(pi->ref_tag) == virt) + pi->ref_tag = cpu_to_be32(phys); virt++; phys++; } - kunmap_atomic(sdt); + kunmap_atomic(pi); } - bio->bi_flags |= (1 << BIO_MAPPED_INTEGRITY); + bip->bip_flags |= BIP_MAPPED_INTEGRITY; } } @@ -411,11 +158,11 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector, */ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes) { - const int tuple_sz = sizeof(struct sd_dif_tuple); + const int tuple_sz = sizeof(struct t10_pi_tuple); struct scsi_disk *sdkp; struct bio *bio; - struct sd_dif_tuple *sdt; - unsigned int j, sectors, sector_sz; + struct t10_pi_tuple *pi; + unsigned int j, intervals; u32 phys, virt; sdkp = scsi_disk(scmd->request->rq_disk); @@ -423,39 +170,35 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes) if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION || good_bytes == 0) return; - sector_sz = scmd->device->sector_size; - sectors = good_bytes / sector_sz; - - phys = blk_rq_pos(scmd->request) & 0xffffffff; - if (sector_sz == 4096) - phys >>= 3; + intervals = good_bytes / scsi_prot_interval(scmd); + phys = scsi_prot_ref_tag(scmd); __rq_for_each_bio(bio, scmd->request) { + struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_vec iv; struct bvec_iter iter; - virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff; + virt = bip_get_seed(bip) & 0xffffffff; - bip_for_each_vec(iv, bio->bi_integrity, iter) { - sdt = kmap_atomic(iv.bv_page) - + iv.bv_offset; + bip_for_each_vec(iv, bip, iter) { + pi = kmap_atomic(iv.bv_page) + iv.bv_offset; - for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) { + for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) { - if (sectors == 0) { - kunmap_atomic(sdt); + if (intervals == 0) { + kunmap_atomic(pi); return; } - if (be32_to_cpu(sdt->ref_tag) == phys) - sdt->ref_tag = cpu_to_be32(virt); + if (be32_to_cpu(pi->ref_tag) == phys) + pi->ref_tag = cpu_to_be32(virt); virt++; phys++; - sectors--; + intervals--; } - kunmap_atomic(sdt); + kunmap_atomic(pi); } } } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 01cf8888879..60354449d9e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1711,9 +1711,9 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) } rq = blk_get_request(q, rw, GFP_ATOMIC); - if (!rq) { + if (IS_ERR(rq)) { kfree(long_cmdp); - return -ENOMEM; + return PTR_ERR(rq); } blk_rq_set_block_pc(rq); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index d3fd6e8fb37..4daa372ed38 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -490,7 +490,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); - if (!req) + if (IS_ERR(req)) return DRIVER_ERROR << 24; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index eee1bc0b506..b83846fc785 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* CPU hotplug notifier */ struct notifier_block nb; + /* Protected by event_vq lock */ + bool stop_events; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -303,6 +306,11 @@ static void virtscsi_cancel_event_work(struct virtio_scsi *vscsi) { int i; + /* Stop scheduling work before calling cancel_work_sync. */ + spin_lock_irq(&vscsi->event_vq.vq_lock); + vscsi->stop_events = true; + spin_unlock_irq(&vscsi->event_vq.vq_lock); + for (i = 0; i < VIRTIO_SCSI_EVENT_LEN; i++) cancel_work_sync(&vscsi->event_list[i].work); } @@ -390,7 +398,8 @@ static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; - schedule_work(&event_node->work); + if (!vscsi->stop_events) + queue_work(system_freezable_wq, &event_node->work); } static void virtscsi_event_done(struct virtqueue *vq) @@ -851,13 +860,6 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq->vq = vq; } -static void virtscsi_scan(struct virtio_device *vdev) -{ - struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv; - - scsi_scan_host(shost); -} - static void virtscsi_remove_vqs(struct virtio_device *vdev) { struct Scsi_Host *sh = virtio_scsi_host(vdev); @@ -916,9 +918,6 @@ static int virtscsi_init(struct virtio_device *vdev, virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE); virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE); - if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) - virtscsi_kick_event_all(vscsi); - err = 0; out: @@ -997,10 +996,13 @@ static int virtscsi_probe(struct virtio_device *vdev) err = scsi_add_host(shost, &vdev->dev); if (err) goto scsi_add_host_failed; - /* - * scsi_scan_host() happens in virtscsi_scan() via virtio_driver->scan() - * after VIRTIO_CONFIG_S_DRIVER_OK has been set.. - */ + + virtio_device_ready(vdev); + + if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) + virtscsi_kick_event_all(vscsi); + + scsi_scan_host(shost); return 0; scsi_add_host_failed: @@ -1048,8 +1050,15 @@ static int virtscsi_restore(struct virtio_device *vdev) return err; err = register_hotcpu_notifier(&vscsi->nb); - if (err) + if (err) { vdev->config->del_vqs(vdev); + return err; + } + + virtio_device_ready(vdev); + + if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) + virtscsi_kick_event_all(vscsi); return err; } @@ -1073,7 +1082,6 @@ static struct virtio_driver virtio_scsi_driver = { .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtscsi_probe, - .scan = virtscsi_scan, #ifdef CONFIG_PM_SLEEP .freeze = virtscsi_freeze, .restore = virtscsi_restore, diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 943b1dbe859..70d9f6dabba 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1050,7 +1050,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, (data_direction == DMA_TO_DEVICE), GFP_KERNEL); - if (!req) { + if (IS_ERR(req)) { pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto fail; diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index fed0ce198ae..df598dd8c5c 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -117,6 +117,43 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, } EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature); +static void __virtio_config_changed(struct virtio_device *dev) +{ + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + + if (!dev->config_enabled) + dev->config_change_pending = true; + else if (drv && drv->config_changed) + drv->config_changed(dev); +} + +void virtio_config_changed(struct virtio_device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->config_lock, flags); + __virtio_config_changed(dev); + spin_unlock_irqrestore(&dev->config_lock, flags); +} +EXPORT_SYMBOL_GPL(virtio_config_changed); + +static void virtio_config_disable(struct virtio_device *dev) +{ + spin_lock_irq(&dev->config_lock); + dev->config_enabled = false; + spin_unlock_irq(&dev->config_lock); +} + +static void virtio_config_enable(struct virtio_device *dev) +{ + spin_lock_irq(&dev->config_lock); + dev->config_enabled = true; + if (dev->config_change_pending) + __virtio_config_changed(dev); + dev->config_change_pending = false; + spin_unlock_irq(&dev->config_lock); +} + static int virtio_dev_probe(struct device *_d) { int err, i; @@ -153,6 +190,8 @@ static int virtio_dev_probe(struct device *_d) add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); if (drv->scan) drv->scan(dev); + + virtio_config_enable(dev); } return err; @@ -163,6 +202,8 @@ static int virtio_dev_remove(struct device *_d) struct virtio_device *dev = dev_to_virtio(_d); struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + virtio_config_disable(dev); + drv->remove(dev); /* Driver should have reset device. */ @@ -211,6 +252,10 @@ int register_virtio_device(struct virtio_device *dev) dev->index = err; dev_set_name(&dev->dev, "virtio%u", dev->index); + spin_lock_init(&dev->config_lock); + dev->config_enabled = false; + dev->config_change_pending = false; + /* We always start by resetting the device, in case a previous * driver messed it up. This also tests that code path a little. */ dev->config->reset(dev); @@ -239,6 +284,64 @@ void unregister_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(unregister_virtio_device); +#ifdef CONFIG_PM_SLEEP +int virtio_device_freeze(struct virtio_device *dev) +{ + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + + virtio_config_disable(dev); + + dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; + + if (drv && drv->freeze) + return drv->freeze(dev); + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_device_freeze); + +int virtio_device_restore(struct virtio_device *dev) +{ + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + + /* We always start by resetting the device, in case a previous + * driver messed it up. */ + dev->config->reset(dev); + + /* Acknowledge that we've seen the device. */ + add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + + /* Maybe driver failed before freeze. + * Restore the failed status, for debugging. */ + if (dev->failed) + add_status(dev, VIRTIO_CONFIG_S_FAILED); + + if (!drv) + return 0; + + /* We have a driver! */ + add_status(dev, VIRTIO_CONFIG_S_DRIVER); + + dev->config->finalize_features(dev); + + if (drv->restore) { + int ret = drv->restore(dev); + if (ret) { + add_status(dev, VIRTIO_CONFIG_S_FAILED); + return ret; + } + } + + /* Finally, tell the device we're all set */ + add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + + virtio_config_enable(dev); + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_device_restore); +#endif + static int virtio_init(void) { if (bus_register(&virtio_bus) != 0) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f893148a107..c9703d4d6f6 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -504,6 +504,8 @@ static int virtballoon_restore(struct virtio_device *vdev) if (ret) return ret; + virtio_device_ready(vdev); + fill_balloon(vb, towards_target(vb)); update_balloon_size(vb); return 0; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c600ccfd692..ef9a1650bb8 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -234,8 +234,6 @@ static irqreturn_t vm_interrupt(int irq, void *opaque) { struct virtio_mmio_device *vm_dev = opaque; struct virtio_mmio_vq_info *info; - struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver, - struct virtio_driver, driver); unsigned long status; unsigned long flags; irqreturn_t ret = IRQ_NONE; @@ -244,9 +242,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque) status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS); writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK); - if (unlikely(status & VIRTIO_MMIO_INT_CONFIG) - && vdrv && vdrv->config_changed) { - vdrv->config_changed(&vm_dev->vdev); + if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)) { + virtio_config_changed(&vm_dev->vdev); ret = IRQ_HANDLED; } diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 3d1463c6b12..d34ebfa604f 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -57,9 +57,6 @@ struct virtio_pci_device /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; - /* Status saved during hibernate/restore */ - u8 saved_status; - /* Whether we have vector per vq */ bool per_vq_vectors; }; @@ -211,12 +208,8 @@ static bool vp_notify(struct virtqueue *vq) static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; - struct virtio_driver *drv; - drv = container_of(vp_dev->vdev.dev.driver, - struct virtio_driver, driver); - if (drv && drv->config_changed) - drv->config_changed(&vp_dev->vdev); + virtio_config_changed(&vp_dev->vdev); return IRQ_HANDLED; } @@ -768,16 +761,9 @@ static int virtio_pci_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - struct virtio_driver *drv; int ret; - drv = container_of(vp_dev->vdev.dev.driver, - struct virtio_driver, driver); - - ret = 0; - vp_dev->saved_status = vp_get_status(&vp_dev->vdev); - if (drv && drv->freeze) - ret = drv->freeze(&vp_dev->vdev); + ret = virtio_device_freeze(&vp_dev->vdev); if (!ret) pci_disable_device(pci_dev); @@ -788,27 +774,14 @@ static int virtio_pci_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - struct virtio_driver *drv; int ret; - drv = container_of(vp_dev->vdev.dev.driver, - struct virtio_driver, driver); - ret = pci_enable_device(pci_dev); if (ret) return ret; pci_set_master(pci_dev); - vp_finalize_features(&vp_dev->vdev); - - if (drv && drv->restore) - ret = drv->restore(&vp_dev->vdev); - - /* Finally, tell the device we're all set */ - if (!ret) - vp_set_status(&vp_dev->vdev, vp_dev->saved_status); - - return ret; + return virtio_device_restore(&vp_dev->vdev); } static const struct dev_pm_ops virtio_pci_pm_ops = { diff --git a/fs/block_dev.c b/fs/block_dev.c index e2f3ad0879c..cc9d4114cda 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode) EXPORT_SYMBOL(I_BDEV); /* - * Move the inode from its current bdi to a new bdi. If the inode is dirty we - * need to move it onto the dirty list of @dst so that the inode is always on - * the right list. + * Move the inode from its current bdi to a new bdi. Make sure the inode + * is clean before moving so that it doesn't linger on the old bdi. */ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - struct backing_dev_info *old = inode->i_data.backing_dev_info; - bool wakeup_bdi = false; - - if (unlikely(dst == old)) /* deadlock avoidance */ - return; - bdi_lock_two(&old->wb, &dst->wb); - spin_lock(&inode->i_lock); - inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) { - if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) - wakeup_bdi = true; - list_move(&inode->i_wb_list, &dst->wb.b_dirty); + while (true) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_DIRTY)) { + inode->i_data.backing_dev_info = dst; + spin_unlock(&inode->i_lock); + return; + } + spin_unlock(&inode->i_lock); + WARN_ON_ONCE(write_inode_now(inode, true)); } - spin_unlock(&inode->i_lock); - spin_unlock(&old->wb.list_lock); - spin_unlock(&dst->wb.list_lock); - - if (wakeup_bdi) - bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ @@ -1179,8 +1169,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fa45e3cae40..1ad0f47ac85 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1702,7 +1702,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi && bdi_congested(bdi, bdi_bits)) { + if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fc9c0439caa..d23362f4464 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5261,42 +5261,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) iput(inode); inode = ERR_PTR(ret); } - /* - * If orphan cleanup did remove any orphans, it means the tree - * was modified and therefore the commit root is not the same as - * the current root anymore. This is a problem, because send - * uses the commit root and therefore can see inode items that - * don't exist in the current root anymore, and for example make - * calls to btrfs_iget, which will do tree lookups based on the - * current root and not on the commit root. Those lookups will - * fail, returning a -ESTALE error, and making send fail with - * that error. So make sure a send does not see any orphans we - * have just removed, and that it will see the same inodes - * regardless of whether a transaction commit happened before - * it started (meaning that the commit root will be the same as - * the current root) or not. - */ - if (sub_root->node != sub_root->commit_root) { - u64 sub_flags = btrfs_root_flags(&sub_root->root_item); - - if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { - struct extent_buffer *eb; - - /* - * Assert we can't have races between dentry - * lookup called through the snapshot creation - * ioctl and the VFS. - */ - ASSERT(mutex_is_locked(&dir->i_mutex)); - - down_write(&root->fs_info->commit_root_sem); - eb = sub_root->commit_root; - sub_root->commit_root = - btrfs_root_node(sub_root); - up_write(&root->fs_info->commit_root_sem); - free_extent_buffer(eb); - } - } } return inode; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0fe1aa047f1..8d2b76e29d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -713,6 +713,39 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) goto fail; + ret = btrfs_orphan_cleanup(pending_snapshot->snap); + if (ret) + goto fail; + + /* + * If orphan cleanup did remove any orphans, it means the tree was + * modified and therefore the commit root is not the same as the + * current root anymore. This is a problem, because send uses the + * commit root and therefore can see inode items that don't exist + * in the current root anymore, and for example make calls to + * btrfs_iget, which will do tree lookups based on the current root + * and not on the commit root. Those lookups will fail, returning a + * -ESTALE error, and making send fail with that error. So make sure + * a send does not see any orphans we have just removed, and that it + * will see the same inodes regardless of whether a transaction + * commit happened before it started (meaning that the commit root + * will be the same as the current root) or not. + */ + if (readonly && pending_snapshot->snap->node != + pending_snapshot->snap->commit_root) { + trans = btrfs_join_transaction(pending_snapshot->snap); + if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { + ret = PTR_ERR(trans); + goto fail; + } + if (!IS_ERR(trans)) { + ret = btrfs_commit_transaction(trans, + pending_snapshot->snap); + if (ret) + goto fail; + } + } + inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 9409fa10bd5..3182273a340 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -45,6 +45,7 @@ #define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ #define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */ #define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */ +#define CIFS_MOUNT_MAP_SFM_CHR 0x800000 /* SFM/MAC mapping for illegal chars */ struct cifs_sb_info { struct rb_root tlink_tree; diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 15e9505aa35..0303c6793d9 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -20,6 +20,7 @@ */ #include <linux/fs.h> #include <linux/slab.h> +#include "cifs_fs_sb.h" #include "cifs_unicode.h" #include "cifs_uniupr.h" #include "cifspdu.h" @@ -61,26 +62,24 @@ cifs_utf16_bytes(const __le16 *from, int maxbytes, return outlen; } -/* - * cifs_mapchar - convert a host-endian char to proper char in codepage - * @target - where converted character should be copied - * @src_char - 2 byte host-endian source character - * @cp - codepage to which character should be converted - * @mapchar - should character be mapped according to mapchars mount option? - * - * This function handles the conversion of a single character. It is the - * responsibility of the caller to ensure that the target buffer is large - * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). - */ -static int -cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, - bool mapchar) +int cifs_remap(struct cifs_sb_info *cifs_sb) { - int len = 1; + int map_type; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + map_type = SFM_MAP_UNI_RSVD; + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + map_type = SFU_MAP_UNI_RSVD; + else + map_type = NO_MAP_UNI_RSVD; - if (!mapchar) - goto cp_convert; + return map_type; +} +/* Convert character using the SFU - "Services for Unix" remapping range */ +static bool +convert_sfu_char(const __u16 src_char, char *target) +{ /* * BB: Cannot handle remapping UNI_SLASH until all the calls to * build_path_from_dentry are modified, as they use slash as @@ -106,19 +105,74 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, *target = '<'; break; default: - goto cp_convert; + return false; } + return true; +} + +/* Convert character using the SFM - "Services for Mac" remapping range */ +static bool +convert_sfm_char(const __u16 src_char, char *target) +{ + switch (src_char) { + case SFM_COLON: + *target = ':'; + break; + case SFM_ASTERISK: + *target = '*'; + break; + case SFM_QUESTION: + *target = '?'; + break; + case SFM_PIPE: + *target = '|'; + break; + case SFM_GRTRTHAN: + *target = '>'; + break; + case SFM_LESSTHAN: + *target = '<'; + break; + case SFM_SLASH: + *target = '\\'; + break; + default: + return false; + } + return true; +} -out: - return len; -cp_convert: +/* + * cifs_mapchar - convert a host-endian char to proper char in codepage + * @target - where converted character should be copied + * @src_char - 2 byte host-endian source character + * @cp - codepage to which character should be converted + * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2? + * + * This function handles the conversion of a single character. It is the + * responsibility of the caller to ensure that the target buffer is large + * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). + */ +static int +cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, + int maptype) +{ + int len = 1; + + if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) + return len; + else if ((maptype == SFU_MAP_UNI_RSVD) && + convert_sfu_char(src_char, target)) + return len; + + /* if character not one of seven in special remap set */ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); if (len <= 0) { *target = '?'; len = 1; } - goto out; + return len; } /* @@ -145,7 +199,7 @@ cp_convert: */ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, - const struct nls_table *codepage, bool mapchar) + const struct nls_table *codepage, int map_type) { int i, charlen, safelen; int outlen = 0; @@ -172,13 +226,13 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, * conversion bleed into the null terminator */ if (outlen >= safelen) { - charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar); + charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); if ((outlen + charlen) > (tolen - nullsize)) break; } /* put converted char into 'to' buffer */ - charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); + charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); outlen += charlen; } @@ -267,7 +321,7 @@ cifs_strndup_from_utf16(const char *src, const int maxlen, if (!dst) return NULL; cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, - false); + NO_MAP_UNI_RSVD); } else { len = strnlen(src, maxlen); len++; @@ -280,6 +334,66 @@ cifs_strndup_from_utf16(const char *src, const int maxlen, return dst; } +static __le16 convert_to_sfu_char(char src_char) +{ + __le16 dest_char; + + switch (src_char) { + case ':': + dest_char = cpu_to_le16(UNI_COLON); + break; + case '*': + dest_char = cpu_to_le16(UNI_ASTERISK); + break; + case '?': + dest_char = cpu_to_le16(UNI_QUESTION); + break; + case '<': + dest_char = cpu_to_le16(UNI_LESSTHAN); + break; + case '>': + dest_char = cpu_to_le16(UNI_GRTRTHAN); + break; + case '|': + dest_char = cpu_to_le16(UNI_PIPE); + break; + default: + dest_char = 0; + } + + return dest_char; +} + +static __le16 convert_to_sfm_char(char src_char) +{ + __le16 dest_char; + + switch (src_char) { + case ':': + dest_char = cpu_to_le16(SFM_COLON); + break; + case '*': + dest_char = cpu_to_le16(SFM_ASTERISK); + break; + case '?': + dest_char = cpu_to_le16(SFM_QUESTION); + break; + case '<': + dest_char = cpu_to_le16(SFM_LESSTHAN); + break; + case '>': + dest_char = cpu_to_le16(SFM_GRTRTHAN); + break; + case '|': + dest_char = cpu_to_le16(SFM_PIPE); + break; + default: + dest_char = 0; + } + + return dest_char; +} + /* * Convert 16 bit Unicode pathname to wire format from string in current code * page. Conversion may involve remapping up the six characters that are @@ -288,7 +402,7 @@ cifs_strndup_from_utf16(const char *src, const int maxlen, */ int cifsConvertToUTF16(__le16 *target, const char *source, int srclen, - const struct nls_table *cp, int mapChars) + const struct nls_table *cp, int map_chars) { int i, charlen; int j = 0; @@ -296,39 +410,30 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, __le16 dst_char; wchar_t tmp; - if (!mapChars) + if (map_chars == NO_MAP_UNI_RSVD) return cifs_strtoUTF16(target, source, PATH_MAX, cp); for (i = 0; i < srclen; j++) { src_char = source[i]; charlen = 1; - switch (src_char) { - case 0: + + /* check if end of string */ + if (src_char == 0) goto ctoUTF16_out; - case ':': - dst_char = cpu_to_le16(UNI_COLON); - break; - case '*': - dst_char = cpu_to_le16(UNI_ASTERISK); - break; - case '?': - dst_char = cpu_to_le16(UNI_QUESTION); - break; - case '<': - dst_char = cpu_to_le16(UNI_LESSTHAN); - break; - case '>': - dst_char = cpu_to_le16(UNI_GRTRTHAN); - break; - case '|': - dst_char = cpu_to_le16(UNI_PIPE); - break; + + /* see if we must remap this char */ + if (map_chars == SFU_MAP_UNI_RSVD) + dst_char = convert_to_sfu_char(src_char); + else if (map_chars == SFM_MAP_UNI_RSVD) + dst_char = convert_to_sfm_char(src_char); + else + dst_char = 0; /* * FIXME: We can not handle remapping backslash (UNI_SLASH) * until all the calls to build_path_from_dentry are modified, * as they use backslash as separator. */ - default: + if (dst_char == 0) { charlen = cp->char2uni(source + i, srclen - i, &tmp); dst_char = cpu_to_le16(tmp); diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index d8eac3b6cef..bdc52cb9a67 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -52,6 +52,34 @@ #define UNI_PIPE (__u16) ('|' + 0xF000) #define UNI_SLASH (__u16) ('\\' + 0xF000) +/* + * Macs use an older "SFM" mapping of the symbols above. Fortunately it does + * not conflict (although almost does) with the mapping above. + */ + +#define SFM_ASTERISK ((__u16) 0xF021) +#define SFM_QUESTION ((__u16) 0xF025) +#define SFM_COLON ((__u16) 0xF022) +#define SFM_GRTRTHAN ((__u16) 0xF024) +#define SFM_LESSTHAN ((__u16) 0xF023) +#define SFM_PIPE ((__u16) 0xF027) +#define SFM_SLASH ((__u16) 0xF026) + +/* + * Mapping mechanism to use when one of the seven reserved characters is + * encountered. We can only map using one of the mechanisms at a time + * since otherwise readdir could return directory entries which we would + * not be able to open + * + * NO_MAP_UNI_RSVD = do not perform any remapping of the character + * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible) + * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option) + * + */ +#define NO_MAP_UNI_RSVD 0 +#define SFM_MAP_UNI_RSVD 1 +#define SFU_MAP_UNI_RSVD 2 + /* Just define what we want from uniupr.h. We don't want to define the tables * in each source file. */ @@ -75,7 +103,7 @@ extern const struct UniCaseRange CifsUniLowerRange[]; #ifdef __KERNEL__ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, - const struct nls_table *codepage, bool mapchar); + const struct nls_table *cp, int map_type); int cifs_utf16_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage); int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *); @@ -84,6 +112,7 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen, const struct nls_table *codepage); extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, const struct nls_table *cp, int mapChars); +extern int cifs_remap(struct cifs_sb_info *cifs_sb); #ifdef CONFIG_CIFS_SMB2 extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, const struct nls_table *cp, diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 4934347321d..4ac7445e6ec 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -431,7 +431,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) return -ENOMEM; cifs_from_utf16(ses->domainName, (__le16 *)blobptr, attrsize, attrsize, - nls_cp, false); + nls_cp, NO_MAP_UNI_RSVD); break; } } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 25b8392bfdd..02a33e52990 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -323,11 +323,11 @@ struct smb_version_operations { int (*async_writev)(struct cifs_writedata *, void (*release)(struct kref *)); /* sync read from the server */ - int (*sync_read)(const unsigned int, struct cifsFileInfo *, + int (*sync_read)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, char **, int *); /* sync write to the server */ - int (*sync_write)(const unsigned int, struct cifsFileInfo *, + int (*sync_write)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, struct kvec *, unsigned long); /* open dir, start readdir */ @@ -466,6 +466,7 @@ struct smb_vol { bool direct_io:1; bool strict_io:1; /* strict cache behavior */ bool remap:1; /* set to remap seven reserved chars in filenames */ + bool sfu_remap:1; /* remap seven reserved chars ala SFU */ bool posix_paths:1; /* unset to not ask for posix pathnames. */ bool no_linux_ext:1; bool sfu_emul:1; @@ -499,6 +500,7 @@ struct smb_vol { #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \ CIFS_MOUNT_SERVER_INUM | CIFS_MOUNT_DIRECT_IO | \ CIFS_MOUNT_NO_XATTR | CIFS_MOUNT_MAP_SPECIAL_CHR | \ + CIFS_MOUNT_MAP_SFM_CHR | \ CIFS_MOUNT_UNX_EMUL | CIFS_MOUNT_NO_BRL | \ CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_OVERR_UID | \ CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 66f65001a6d..61d00a6e398 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -867,7 +867,7 @@ CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, int rc = 0; int bytes_returned; int name_len; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); DelFileRetry: rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, @@ -913,7 +913,7 @@ CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, int rc = 0; int bytes_returned; int name_len; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); cifs_dbg(FYI, "In CIFSSMBRmDir\n"); RmDirRetry: @@ -958,7 +958,7 @@ CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, CREATE_DIRECTORY_RSP *pSMBr = NULL; int bytes_returned; int name_len; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); cifs_dbg(FYI, "In CIFSSMBMkDir\n"); MkDirRetry: @@ -1280,7 +1280,7 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock, __u16 count; struct cifs_sb_info *cifs_sb = oparms->cifs_sb; struct cifs_tcon *tcon = oparms->tcon; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); const struct nls_table *nls = cifs_sb->local_nls; int create_options = oparms->create_options; int desired_access = oparms->desired_access; @@ -2572,7 +2572,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len, name_len2; __u16 count; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); cifs_dbg(FYI, "In CIFSSMBRename\n"); renameRetry: @@ -2968,7 +2968,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len, name_len2; __u16 count; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); cifs_dbg(FYI, "In CIFSCreateHardLink\n"); winCreateHardLinkRetry: @@ -4367,7 +4367,7 @@ findFirstRetry: return rc; nls_codepage = cifs_sb->local_nls; - remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + remap = cifs_remap(cifs_sb); if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -5527,7 +5527,7 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, int name_len; int rc = 0; int bytes_returned = 0; - int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + int remap = cifs_remap(cifs_sb); __u16 params, byte_count, data_count, param_offset, offset; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 239e1fb3300..24fa08d261f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -70,6 +70,7 @@ enum { Opt_forcegid, Opt_noforcegid, Opt_noblocksend, Opt_noautotune, Opt_hard, Opt_soft, Opt_perm, Opt_noperm, + Opt_mapposix, Opt_nomapposix, Opt_mapchars, Opt_nomapchars, Opt_sfu, Opt_nosfu, Opt_nodfs, Opt_posixpaths, Opt_noposixpaths, Opt_nounix, @@ -124,8 +125,10 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_soft, "soft" }, { Opt_perm, "perm" }, { Opt_noperm, "noperm" }, - { Opt_mapchars, "mapchars" }, + { Opt_mapchars, "mapchars" }, /* SFU style */ { Opt_nomapchars, "nomapchars" }, + { Opt_mapposix, "mapposix" }, /* SFM style */ + { Opt_nomapposix, "nomapposix" }, { Opt_sfu, "sfu" }, { Opt_nosfu, "nosfu" }, { Opt_nodfs, "nodfs" }, @@ -1231,6 +1234,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->linux_uid = current_uid(); vol->linux_gid = current_gid(); + /* + * default to SFM style remapping of seven reserved characters + * unless user overrides it or we negotiate CIFS POSIX where + * it is unnecessary. Can not simultaneously use more than one mapping + * since then readdir could list files that open could not open + */ + vol->remap = true; + /* default to only allowing write access to owner of the mount */ vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; @@ -1338,10 +1349,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->noperm = 1; break; case Opt_mapchars: - vol->remap = 1; + vol->sfu_remap = true; + vol->remap = false; /* disable SFM mapping */ break; case Opt_nomapchars: - vol->remap = 0; + vol->sfu_remap = false; + break; + case Opt_mapposix: + vol->remap = true; + vol->sfu_remap = false; /* disable SFU mapping */ + break; + case Opt_nomapposix: + vol->remap = false; break; case Opt_sfu: vol->sfu_emul = 1; @@ -3197,6 +3216,8 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, if (pvolume_info->server_ino) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; if (pvolume_info->remap) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR; + if (pvolume_info->sfu_remap) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; if (pvolume_info->no_xattr) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; @@ -3239,10 +3260,20 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, } if (pvolume_info->mfsymlinks) { if (pvolume_info->sfu_emul) { - cifs_dbg(VFS, "mount option mfsymlinks ignored if sfu mount option is used\n"); - } else { - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; + /* + * Our SFU ("Services for Unix" emulation does not allow + * creating symlinks but does allow reading existing SFU + * symlinks (it does allow both creating and reading SFU + * style mknod and FIFOs though). When "mfsymlinks" and + * "sfu" are both enabled at the same time, it allows + * reading both types of symlinks, but will only create + * them with mfsymlinks format. This allows better + * Apple compatibility (probably better for Samba too) + * while still recognizing old Windows style symlinks. + */ + cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); } + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; } if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) @@ -3330,8 +3361,7 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1; rc = get_dfs_path(xid, ses, ref_path, cifs_sb->local_nls, - &num_referrals, &referrals, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + &num_referrals, &referrals, cifs_remap(cifs_sb)); if (!rc && num_referrals > 0) { char *fake_devname = NULL; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 073640675a3..b72bc29cba2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -577,12 +577,13 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, struct cifs_io_parms io_parms; char *full_path = NULL; struct inode *newinode = NULL; - int oplock = 0; + __u32 oplock = 0; struct cifs_fid fid; struct cifs_open_parms oparms; FILE_ALL_INFO *buf = NULL; unsigned int bytes_written; struct win_dev *pdev; + struct kvec iov[2]; if (!old_valid_dev(device_number)) return -EINVAL; @@ -658,7 +659,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, oparms.fid = &fid; oparms.reconnect = false; - rc = CIFS_open(xid, &oparms, &oplock, buf); + if (tcon->ses->server->oplocks) + oplock = REQ_OPLOCK; + else + oplock = 0; + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf); if (rc) goto mknod_out; @@ -668,25 +673,26 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, */ pdev = (struct win_dev *)buf; - io_parms.netfid = fid.netfid; io_parms.pid = current->tgid; io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = sizeof(struct win_dev); + iov[1].iov_base = buf; + iov[1].iov_len = sizeof(struct win_dev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); pdev->major = cpu_to_le64(MAJOR(device_number)); pdev->minor = cpu_to_le64(MINOR(device_number)); - rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, (char *)pdev, - NULL, 0); + rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, + &bytes_written, iov, 1); } else if (S_ISBLK(mode)) { memcpy(pdev->type, "IntxBLK", 8); pdev->major = cpu_to_le64(MAJOR(device_number)); pdev->minor = cpu_to_le64(MINOR(device_number)); - rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, (char *)pdev, - NULL, 0); + rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, + &bytes_written, iov, 1); } /* else if (S_ISFIFO) */ - CIFSSMBClose(xid, tcon, fid.netfid); + tcon->ses->server->ops->close(xid, tcon, &fid); d_drop(direntry); /* FIXME: add code here to set EAs */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8f7b40fd8f3..3e4d00a06c4 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1687,8 +1687,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, io_parms.tcon = tcon; io_parms.offset = *offset; io_parms.length = len; - rc = server->ops->sync_write(xid, open_file, &io_parms, - &bytes_written, iov, 1); + rc = server->ops->sync_write(xid, &open_file->fid, + &io_parms, &bytes_written, iov, 1); } if (rc || (bytes_written == 0)) { if (total_written) @@ -3206,7 +3206,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) io_parms.tcon = tcon; io_parms.offset = *offset; io_parms.length = current_read_size; - rc = server->ops->sync_read(xid, open_file, &io_parms, + rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, &bytes_read, &cur_offset, &buf_type); } while (rc == -EAGAIN); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8fd4ee8e07f..197cb503d52 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -30,6 +30,7 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "cifs_unicode.h" #include "fscache.h" @@ -412,7 +413,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, struct cifs_sb_info *cifs_sb, unsigned int xid) { int rc; - int oplock = 0; + __u32 oplock; struct tcon_link *tlink; struct cifs_tcon *tcon; struct cifs_fid fid; @@ -451,8 +452,13 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.fid = &fid; oparms.reconnect = false; - rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (tcon->ses->server->oplocks) + oplock = REQ_OPLOCK; + else + oplock = 0; + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, NULL); if (rc) { + cifs_dbg(FYI, "check sfu type of %s, open rc = %d\n", path, rc); cifs_put_tlink(tlink); return rc; } @@ -464,7 +470,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, io_parms.offset = 0; io_parms.length = 24; - rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type); + rc = tcon->ses->server->ops->sync_read(xid, &fid, &io_parms, + &bytes_read, &pbuf, &buf_type); if ((rc == 0) && (bytes_read >= 8)) { if (memcmp("IntxBLK", pbuf, 8) == 0) { cifs_dbg(FYI, "Block device\n"); @@ -504,7 +511,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, fattr->cf_dtype = DT_REG; rc = -EOPNOTSUPP; /* or some unknown SFU type */ } - CIFSSMBClose(xid, tcon, fid.netfid); + + tcon->ses->server->ops->close(xid, tcon, &fid); cifs_put_tlink(tlink); return rc; } @@ -539,7 +547,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path, "SETFILEBITS", ea_value, 4 /* size of buf */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (rc < 0) return (int)rc; @@ -952,11 +960,18 @@ struct inode *cifs_root_iget(struct super_block *sb) struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); xid = get_xid(); - if (tcon->unix_ext) + if (tcon->unix_ext) { rc = cifs_get_inode_info_unix(&inode, "", sb, xid); - else - rc = cifs_get_inode_info(&inode, "", NULL, sb, xid, NULL); + /* some servers mistakenly claim POSIX support */ + if (rc != -EOPNOTSUPP) + goto iget_no_retry; + cifs_dbg(VFS, "server does not support POSIX extensions"); + tcon->unix_ext = false; + } + + rc = cifs_get_inode_info(&inode, "", NULL, sb, xid, NULL); +iget_no_retry: if (!inode) { inode = ERR_PTR(rc); goto out; @@ -1117,8 +1132,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, /* rename the file */ rc = CIFSSMBRenameOpenFile(xid, tcon, fid.netfid, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc != 0) { rc = -EBUSY; goto undo_setattr; @@ -1159,8 +1173,7 @@ out: */ undo_rename: CIFSSMBRenameOpenFile(xid, tcon, fid.netfid, dentry->d_name.name, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); undo_setattr: if (dosattr != origattr) { info_buf->Attributes = cpu_to_le32(origattr); @@ -1226,7 +1239,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_dbg(FYI, "posix del rc %d\n", rc); if ((rc == 0) || (rc == -ENOENT)) goto psx_del_no_retry; @@ -1349,8 +1362,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, } CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); } else { struct TCP_Server_Info *server = tcon->ses->server; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && @@ -1392,8 +1404,7 @@ cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, NULL /* netfid */, info, &oplock, full_path, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); if (rc == -EOPNOTSUPP) goto posix_mkdir_out; else if (rc) { @@ -1617,8 +1628,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, if (rc == 0) { rc = CIFSSMBRenameOpenFile(xid, tcon, fid.netfid, (const char *) to_dentry->d_name.name, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); CIFSSMBClose(xid, tcon, fid.netfid); } do_rename_exit: @@ -1694,16 +1704,14 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, tmprc = CIFSSMBUnixQPathInfo(xid, tcon, from_name, info_buf_source, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (tmprc != 0) goto unlink_target; tmprc = CIFSSMBUnixQPathInfo(xid, tcon, to_name, info_buf_target, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (tmprc == 0 && (info_buf_source->UniqueId == info_buf_target->UniqueId)) { @@ -2068,8 +2076,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN, GENERIC_WRITE, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc == 0) { unsigned int bytes_written; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 5657416d348..2ec6037f61c 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -28,6 +28,10 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "cifs_unicode.h" +#ifdef CONFIG_CIFS_SMB2 +#include "smb2proto.h" +#endif /* * M-F Symlink Functions - Begin @@ -401,6 +405,134 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, } /* + * SMB 2.1/SMB3 Protocol specific functions + */ +#ifdef CONFIG_CIFS_SMB2 +int +smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const unsigned char *path, + char *pbuf, unsigned int *pbytes_read) +{ + int rc; + struct cifs_fid fid; + struct cifs_open_parms oparms; + struct cifs_io_parms io_parms; + int buf_type = CIFS_NO_BUFFER; + __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_II; + struct smb2_file_all_info *pfile_info = NULL; + + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = GENERIC_READ; + oparms.create_options = CREATE_NOT_DIR; + if (backup_cred(cifs_sb)) + oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.disposition = FILE_OPEN; + oparms.fid = &fid; + oparms.reconnect = false; + + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (utf16_path == NULL) + return -ENOMEM; + + pfile_info = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, + GFP_KERNEL); + + if (pfile_info == NULL) { + kfree(utf16_path); + return -ENOMEM; + } + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, pfile_info, NULL); + if (rc) + goto qmf_out_open_fail; + + if (pfile_info->EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { + /* it's not a symlink */ + rc = -ENOENT; /* Is there a better rc to return? */ + goto qmf_out; + } + + io_parms.netfid = fid.netfid; + io_parms.pid = current->tgid; + io_parms.tcon = tcon; + io_parms.offset = 0; + io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE; + io_parms.persistent_fid = fid.persistent_fid; + io_parms.volatile_fid = fid.volatile_fid; + rc = SMB2_read(xid, &io_parms, pbytes_read, &pbuf, &buf_type); +qmf_out: + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); +qmf_out_open_fail: + kfree(utf16_path); + kfree(pfile_info); + return rc; +} + +int +smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const unsigned char *path, + char *pbuf, unsigned int *pbytes_written) +{ + int rc; + struct cifs_fid fid; + struct cifs_open_parms oparms; + struct cifs_io_parms io_parms; + int create_options = CREATE_NOT_DIR; + __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + struct kvec iov[2]; + + if (backup_cred(cifs_sb)) + create_options |= CREATE_OPEN_BACKUP_INTENT; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, path); + + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = GENERIC_WRITE; + oparms.create_options = create_options; + oparms.disposition = FILE_CREATE; + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); + if (rc) { + kfree(utf16_path); + return rc; + } + + io_parms.netfid = fid.netfid; + io_parms.pid = current->tgid; + io_parms.tcon = tcon; + io_parms.offset = 0; + io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE; + io_parms.persistent_fid = fid.persistent_fid; + io_parms.volatile_fid = fid.volatile_fid; + + /* iov[0] is reserved for smb header */ + iov[1].iov_base = pbuf; + iov[1].iov_len = CIFS_MF_SYMLINK_FILE_SIZE; + + rc = SMB2_write(xid, &io_parms, pbytes_written, iov, 1); + + /* Make sure we wrote all of the symlink data */ + if ((rc == 0) && (*pbytes_written != CIFS_MF_SYMLINK_FILE_SIZE)) + rc = -EIO; + + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + + kfree(utf16_path); + return rc; +} +#endif /* CONFIG_CIFS_SMB2 */ + +/* * M-F Symlink Functions - End */ @@ -435,8 +567,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, if (tcon->unix_ext) rc = CIFSUnixCreateHardLink(xid, tcon, from_name, to_name, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); else { server = tcon->ses->server; if (!server->ops->create_hardlink) { @@ -461,11 +592,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, spin_lock(&old_file->d_inode->i_lock); inc_nlink(old_file->d_inode); spin_unlock(&old_file->d_inode->i_lock); - /* - * BB should we make this contingent on superblock flag - * NOATIME? - */ - /* old_file->d_inode->i_ctime = CURRENT_TIME; */ + /* * parent dir timestamps will update from srv within a * second, would it really be worth it to set the parent @@ -475,7 +602,9 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, } /* * if not oplocked will force revalidate to get info on source - * file from srv + * file from srv. Note Samba server prior to 4.2 has bug - + * not updating src file ctime on hardlinks but Windows servers + * handle it properly */ cifsInode->time = 0; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d2141f10138..8fd2a95860b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -239,7 +239,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ, OPEN_REPARSE_POINT, &fid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb); if (!rc) { tmpbuffer = kmalloc(maxpath); rc = CIFSSMBQueryReparseLinkInfo(xid, ptcon, full_path, @@ -704,15 +704,15 @@ static int cifs_filldir(char *find_entry, struct file *file, if (file_info->srch_inf.unicode) { struct nls_table *nlt = cifs_sb->local_nls; + int map_type; + map_type = cifs_remap(cifs_sb); name.name = scratch_buf; name.len = cifs_from_utf16((char *)name.name, (__le16 *)de.name, UNICODE_NAME_MAX, min_t(size_t, de.namelen, - (size_t)max_len), nlt, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + (size_t)max_len), nlt, map_type); name.len -= nls_nullsize(nlt); } else { name.name = de.name; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 52131d8cb4d..d2979036a4c 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -23,6 +23,7 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifspdu.h" +#include "cifs_unicode.h" /* * An NT cancel request header looks just like the original request except: @@ -530,13 +531,11 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, rc = CIFSSMBQPathInfo(xid, tcon, full_path, file_info, 0 /* not legacy */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc == -EOPNOTSUPP || rc == -EINVAL) rc = SMBQueryInformation(xid, tcon, full_path, file_info, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); kfree(file_info); return rc; } @@ -552,8 +551,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); /* * BB optimize code so we do not make the above call when server claims * no NT SMB support and the above call failed at least once - set flag @@ -562,8 +560,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, tcon, full_path, data, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); *adjustTZ = true; } @@ -611,8 +608,7 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, */ return CIFSGetSrvInodeNumber(xid, tcon, full_path, uniqueid, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); } static int @@ -703,8 +699,7 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; info.Attributes = cpu_to_le32(dosattrs); rc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc == 0) cifsInode->cifsAttrs = dosattrs; } @@ -720,8 +715,7 @@ cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, oparms->create_options, &oparms->fid->netfid, oplock, buf, oparms->cifs_sb->local_nls, - oparms->cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(oparms->cifs_sb)); return CIFS_open(xid, oparms, oplock, buf); } @@ -749,21 +743,21 @@ cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon, } static int -cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, +cifs_sync_read(const unsigned int xid, struct cifs_fid *pfid, struct cifs_io_parms *parms, unsigned int *bytes_read, char **buf, int *buf_type) { - parms->netfid = cfile->fid.netfid; + parms->netfid = pfid->netfid; return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type); } static int -cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, +cifs_sync_write(const unsigned int xid, struct cifs_fid *pfid, struct cifs_io_parms *parms, unsigned int *written, struct kvec *iov, unsigned long nr_segs) { - parms->netfid = cfile->fid.netfid; + parms->netfid = pfid->netfid; return CIFSSMBWrite2(xid, parms, written, iov, nr_segs); } @@ -800,8 +794,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, tcon = tlink_tcon(tlink); rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc == 0) { cinode->cifsAttrs = le32_to_cpu(buf->Attributes); goto out; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 4aa7a0f07d6..1a08a34838f 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -379,6 +379,14 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) int len; const char *start_of_path; __le16 *to; + int map_type; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + map_type = SFM_MAP_UNI_RSVD; + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + map_type = SFU_MAP_UNI_RSVD; + else + map_type = NO_MAP_UNI_RSVD; /* Windows doesn't allow paths beginning with \ */ if (from[0] == '\\') @@ -386,9 +394,7 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) else start_of_path = from; to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, map_type); return to; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f522193b718..c5f521bcdee 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -265,15 +265,18 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, NULL /* no data input */, 0 /* no data input */, (char **)&out_buf, &ret_data_len); - - if ((rc == 0) && (ret_data_len > 0)) { + if (rc != 0) + cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc); + else if (ret_data_len < sizeof(struct network_interface_info_ioctl_rsp)) { + cifs_dbg(VFS, "server returned bad net interface info buf\n"); + rc = -EINVAL; + } else { /* Dump info on first interface */ cifs_dbg(FYI, "Adapter Capability 0x%x\t", le32_to_cpu(out_buf->Capability)); cifs_dbg(FYI, "Link Speed %lld\n", le64_to_cpu(out_buf->LinkSpeed)); - } else - cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc); + } return rc; } @@ -711,23 +714,23 @@ smb2_read_data_length(char *buf) static int -smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, +smb2_sync_read(const unsigned int xid, struct cifs_fid *pfid, struct cifs_io_parms *parms, unsigned int *bytes_read, char **buf, int *buf_type) { - parms->persistent_fid = cfile->fid.persistent_fid; - parms->volatile_fid = cfile->fid.volatile_fid; + parms->persistent_fid = pfid->persistent_fid; + parms->volatile_fid = pfid->volatile_fid; return SMB2_read(xid, parms, bytes_read, buf, buf_type); } static int -smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, +smb2_sync_write(const unsigned int xid, struct cifs_fid *pfid, struct cifs_io_parms *parms, unsigned int *written, struct kvec *iov, unsigned long nr_segs) { - parms->persistent_fid = cfile->fid.persistent_fid; - parms->volatile_fid = cfile->fid.volatile_fid; + parms->persistent_fid = pfid->persistent_fid; + parms->volatile_fid = pfid->volatile_fid; return SMB2_write(xid, parms, written, iov, nr_segs); } @@ -1452,6 +1455,8 @@ struct smb_version_operations smb21_operations = { .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, .query_symlink = smb2_query_symlink, + .query_mf_symlink = smb3_query_mf_symlink, + .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -1531,6 +1536,8 @@ struct smb_version_operations smb30_operations = { .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, .query_symlink = smb2_query_symlink, + .query_mf_symlink = smb3_query_mf_symlink, + .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 74b3a668438..8f1672bb82d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1098,6 +1098,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (oparms->create_options & CREATE_OPTION_READONLY) file_attributes |= ATTR_READONLY; + if (oparms->create_options & CREATE_OPTION_SPECIAL) + file_attributes |= ATTR_SYSTEM; req->ImpersonationLevel = IL_IMPERSONATION; req->DesiredAccess = cpu_to_le32(oparms->desired_access); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index fbe486c285a..e3188abdafd 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -352,6 +352,8 @@ struct smb2_tree_disconnect_rsp { #define FILE_ATTRIBUTE_OFFLINE 0x00001000 #define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 #define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 +#define FILE_ATTRIBUTE_INTEGRITY_STREAM 0x00008000 +#define FILE_ATTRIBUTE_NO_SCRUB_DATA 0x00020000 /* Oplock levels */ #define SMB2_OPLOCK_LEVEL_NONE 0x00 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 67e8ce8055d..79dc650c18b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -82,7 +82,13 @@ extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); - +extern int smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const unsigned char *path, + char *pbuf, unsigned int *pbytes_written); +extern int smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const unsigned char *path, char *pbuf, + unsigned int *pbytes_read); extern int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, FILE_ALL_INFO *buf); diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 43eb1367b10..6c1566366a6 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -29,6 +29,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/random.h> +#include "cifs_fs_sb.h" #include "cifs_unicode.h" #include "cifspdu.h" #include "cifsglob.h" diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 5ac836a86b1..72a4d10653d 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -28,6 +28,8 @@ #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include "cifs_unicode.h" #define MAX_EA_VALUE_SIZE 65535 #define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" @@ -85,8 +87,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) if (pTcon->ses->server->ops->set_EA) rc = pTcon->ses->server->ops->set_EA(xid, pTcon, full_path, ea_name, NULL, (__u16)0, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); } remove_ea_exit: kfree(full_path); @@ -154,8 +155,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, if (pTcon->ses->server->ops->set_EA) rc = pTcon->ses->server->ops->set_EA(xid, pTcon, full_path, ea_name, ea_value, (__u16)value_size, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) @@ -165,8 +165,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, if (pTcon->ses->server->ops->set_EA) rc = pTcon->ses->server->ops->set_EA(xid, pTcon, full_path, ea_name, ea_value, (__u16)value_size, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, strlen(CIFS_XATTR_CIFS_ACL)) == 0) { #ifdef CONFIG_CIFS_ACL @@ -199,8 +198,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, ea_value, (const int)value_size, ACL_TYPE_ACCESS, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_dbg(FYI, "set POSIX ACL rc %d\n", rc); #else cifs_dbg(FYI, "set POSIX ACL not supported\n"); @@ -212,8 +210,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, ea_value, (const int)value_size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_dbg(FYI, "set POSIX default ACL rc %d\n", rc); #else cifs_dbg(FYI, "set default POSIX ACL not supported\n"); @@ -285,8 +282,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, full_path, ea_name, ea_value, buf_size, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto get_ea_exit; @@ -295,8 +291,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, full_path, ea_name, ea_value, buf_size, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS)) == 0) { #ifdef CONFIG_CIFS_POSIX @@ -304,8 +299,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, ea_value, buf_size, ACL_TYPE_ACCESS, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); #else cifs_dbg(FYI, "Query POSIX ACL not supported yet\n"); #endif /* CONFIG_CIFS_POSIX */ @@ -316,8 +310,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, ea_value, buf_size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); #else cifs_dbg(FYI, "Query POSIX default ACL not supported yet\n"); #endif /* CONFIG_CIFS_POSIX */ @@ -421,8 +414,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, full_path, NULL, data, buf_size, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); list_ea_exit: kfree(full_path); free_xid(xid); diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 9d61947d473..f3f5e72a29b 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -206,7 +206,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) rs = (struct rcom_status *)rc_in->rc_buf; - if (!(rs->rs_flags & DLM_RSF_NEED_SLOTS)) { + if (!(le32_to_cpu(rs->rs_flags) & DLM_RSF_NEED_SLOTS)) { status = dlm_recover_status(ls); goto do_create; } diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4782e0840dc..04cb830fa09 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -28,6 +28,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index dda4b8667c0..20cffc83046 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -220,11 +220,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t #else VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); - if (rw == READ || rw == KERNEL_READ) - return nfs_file_direct_read(iocb, iter, pos, - rw == READ ? true : false); - return nfs_file_direct_write(iocb, iter, pos, - rw == WRITE ? true : false); + if (rw == READ) + return nfs_file_direct_read(iocb, iter, pos); + return nfs_file_direct_write(iocb, iter, pos); #endif /* CONFIG_NFS_SWAP */ } @@ -510,7 +508,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos, bool uio) + loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -879,7 +877,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos, bool uio) + loff_t pos) { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 4ea92ce0537..2ab6f00dba5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -172,7 +172,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, to, iocb->ki_pos, true); + return nfs_file_direct_read(iocb, to, iocb->ki_pos); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, @@ -676,7 +676,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; if (file->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, from, pos, true); + return nfs_file_direct_write(iocb, from, pos); dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, count, (long long) pos); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index abc5056999d..46fab1cb455 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1031,7 +1031,7 @@ filelayout_clear_request_commit(struct nfs_page *req, } out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg_async(freeme); + pnfs_put_lseg_locked(freeme); } static void diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 141c9f4a40d..6388a59f2ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -718,6 +718,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) kfree(new); return res; } +EXPORT_SYMBOL_GPL(nfs_get_lock_context); void nfs_put_lock_context(struct nfs_lock_context *l_ctx) { @@ -730,6 +731,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) spin_unlock(&inode->i_lock); kfree(l_ctx); } +EXPORT_SYMBOL_GPL(nfs_put_lock_context); /** * nfs_close_context - Common close_context() routine NFSv2/v3 diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h new file mode 100644 index 00000000000..d10333a197b --- /dev/null +++ b/fs/nfs/nfs42.h @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com> + */ + +#ifndef __LINUX_FS_NFS_NFS4_2_H +#define __LINUX_FS_NFS_NFS4_2_H + +/* nfs4.2proc.c */ +loff_t nfs42_proc_llseek(struct file *, loff_t, int); + +/* nfs4.2xdr.h */ +extern struct rpc_procinfo nfs4_2_procedures[]; + +#endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c new file mode 100644 index 00000000000..0886f1db591 --- /dev/null +++ b/fs/nfs/nfs42proc.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com> + */ +#include <linux/fs.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/nfs_xdr.h> +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" +#include "nfs42.h" + +static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, + fmode_t fmode) +{ + struct nfs_open_context *open; + struct nfs_lock_context *lock; + int ret; + + open = get_nfs_open_context(nfs_file_open_context(file)); + lock = nfs_get_lock_context(open); + if (IS_ERR(lock)) { + put_nfs_open_context(open); + return PTR_ERR(lock); + } + + ret = nfs4_set_rw_stateid(dst, open, lock, fmode); + + nfs_put_lock_context(lock); + put_nfs_open_context(open); + return ret; +} + +loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) +{ + struct inode *inode = file_inode(filep); + struct nfs42_seek_args args = { + .sa_fh = NFS_FH(inode), + .sa_offset = offset, + .sa_what = (whence == SEEK_HOLE) ? + NFS4_CONTENT_HOLE : NFS4_CONTENT_DATA, + }; + struct nfs42_seek_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEEK], + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct nfs_server *server = NFS_SERVER(inode); + int status; + + if (!(server->caps & NFS_CAP_SEEK)) + return -ENOTSUPP; + + status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); + if (status) + return status; + + nfs_wb_all(inode); + status = nfs4_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, 0); + if (status == -ENOTSUPP) + server->caps &= ~NFS_CAP_SEEK; + if (status) + return status; + + return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c new file mode 100644 index 00000000000..c90469b604b --- /dev/null +++ b/fs/nfs/nfs42xdr.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com> + */ +#ifndef __LINUX_FS_NFS_NFS4_2XDR_H +#define __LINUX_FS_NFS_NFS4_2XDR_H + +#define encode_seek_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + 2 /* offset */ + \ + 1 /* whence */) +#define decode_seek_maxsz (op_decode_hdr_maxsz + \ + 1 /* eof */ + \ + 1 /* whence */ + \ + 2 /* offset */ + \ + 2 /* length */) + +#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_seek_maxsz) +#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_seek_maxsz) + + +static void encode_seek(struct xdr_stream *xdr, + struct nfs42_seek_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->sa_stateid); + encode_uint64(xdr, args->sa_offset); + encode_uint32(xdr, args->sa_what); +} + +/* + * Encode SEEK request + */ +static void nfs4_xdr_enc_seek(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_seek_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->sa_fh, &hdr); + encode_seek(xdr, args, &hdr); + encode_nops(&hdr); +} + +static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) +{ + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_SEEK); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4 + 8); + if (unlikely(!p)) + goto out_overflow; + + res->sr_eof = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &res->sr_offset); + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* + * Decode SEEK request + */ +static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_seek_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_seek(xdr, res); +out: + return status; +} +#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a8b855ab4e2..be6cac37ea1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -226,6 +226,9 @@ int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); /* nfs4proc.c */ +extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, + struct rpc_message *, struct nfs4_sequence_args *, + struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 3e987ad9ae2..c51fb4db9bf 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -8,6 +8,10 @@ #include "fscache.h" #include "pnfs.h" +#ifdef CONFIG_NFS_V4_2 +#include "nfs42.h" +#endif + #define NFSDBG_FACILITY NFSDBG_FILE static int @@ -115,8 +119,29 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +#ifdef CONFIG_NFS_V4_2 +static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) +{ + loff_t ret; + + switch (whence) { + case SEEK_HOLE: + case SEEK_DATA: + ret = nfs42_proc_llseek(filep, offset, whence); + if (ret != -ENOTSUPP) + return ret; + default: + return nfs_file_llseek(filep, offset, whence); + } +} +#endif /* CONFIG_NFS_V4_2 */ + const struct file_operations nfs4_file_operations = { +#ifdef CONFIG_NFS_V4_2 + .llseek = nfs4_file_llseek, +#else .llseek = nfs_file_llseek, +#endif .read = new_sync_read, .write = new_sync_write, .read_iter = nfs_file_read, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5aa55c132aa..405bd95c1f5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -885,7 +885,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, return ret; } -static int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -8409,7 +8408,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 005d03c5d27..206c08a60c7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7321,6 +7321,10 @@ nfs4_stat_to_errno(int stat) return -stat; } +#ifdef CONFIG_NFS_V4_2 +#include "nfs42xdr.c" +#endif /* CONFIG_NFS_V4_2 */ + #define PROC(proc, argtype, restype) \ [NFSPROC4_CLNT_##proc] = { \ .p_proc = NFSPROC4_COMPOUND, \ @@ -7388,6 +7392,9 @@ struct rpc_procinfo nfs4_procedures[] = { enc_bind_conn_to_session, dec_bind_conn_to_session), PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), #endif /* CONFIG_NFS_V4_1 */ +#ifdef CONFIG_NFS_V4_2 + PROC(SEEK, enc_seek, dec_seek), +#endif /* CONFIG_NFS_V4_2 */ }; const struct rpc_version nfs_version4 = { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 94e16ec8831..ed0db61f854 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -526,7 +526,8 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); */ void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { - put_nfs_open_context(hdr->args.context); + if (hdr->args.context) + put_nfs_open_context(hdr->args.context); if (hdr->page_array.pagevec != hdr->page_array.page_array) kfree(hdr->page_array.pagevec); } @@ -751,12 +752,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - if (WARN_ON_ONCE(pageused >= pagecount)) - return nfs_pgio_error(desc, hdr); - if (!last_page || last_page != req->wb_page) { - *pages++ = last_page = req->wb_page; pageused++; + if (pageused > pagecount) + break; + *pages++ = last_page = req->wb_page; } } if (WARN_ON_ONCE(pageused != pagecount)) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76de7f56811..0a5dda4d85c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,22 +361,43 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg); -static void pnfs_put_lseg_async_work(struct work_struct *work) +static void pnfs_free_lseg_async_work(struct work_struct *work) { struct pnfs_layout_segment *lseg; + struct pnfs_layout_hdr *lo; lseg = container_of(work, struct pnfs_layout_segment, pls_work); + lo = lseg->pls_layout; - pnfs_put_lseg(lseg); + pnfs_free_lseg(lseg); + pnfs_put_layout_hdr(lo); } -void -pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) +static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) { - INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work); + INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); schedule_work(&lseg->pls_work); } -EXPORT_SYMBOL_GPL(pnfs_put_lseg_async); + +void +pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) +{ + if (!lseg) + return; + + assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); + + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, + atomic_read(&lseg->pls_refcount), + test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + if (atomic_dec_and_test(&lseg->pls_refcount)) { + struct pnfs_layout_hdr *lo = lseg->pls_layout; + pnfs_get_layout_hdr(lo); + pnfs_layout_remove_lseg(lo, lseg); + pnfs_free_lseg_async(lseg); + } +} +EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); static u64 end_offset(u64 start, u64 len) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 693ce42ec68..9ae5b765b07 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -190,7 +190,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg); +void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); @@ -270,7 +270,6 @@ nfs4_find_get_deviceid(struct nfs_server *server, void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, const struct nfs4_deviceid *); -struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); @@ -446,10 +445,6 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { } -static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) -{ -} - static inline int pnfs_return_layout(struct inode *ino) { return 0; diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 30206b23843..36ae529511c 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o -ccflags-y := -DNTFS_VERSION=\"2.1.30\" +ccflags-y := -DNTFS_VERSION=\"2.1.31\" ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index d267ea6aa1a..7521e11db72 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1,8 +1,7 @@ /** * aops.c - NTFS kernel address space operations and page cache handling. - * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1539,16 +1538,157 @@ err_out: #endif /* NTFS_RW */ /** - * ntfs_aops - general address space operations for inodes and attributes + * ntfs_bmap - map logical file block to physical device block + * @mapping: address space mapping to which the block to be mapped belongs + * @block: logical block to map to its physical device block + * + * For regular, non-resident files (i.e. not compressed and not encrypted), map + * the logical @block belonging to the file described by the address space + * mapping @mapping to its physical device block. + * + * The size of the block is equal to the @s_blocksize field of the super block + * of the mounted file system which is guaranteed to be smaller than or equal + * to the cluster size thus the block is guaranteed to fit entirely inside the + * cluster which means we do not need to care how many contiguous bytes are + * available after the beginning of the block. + * + * Return the physical device block if the mapping succeeded or 0 if the block + * is sparse or there was an error. + * + * Note: This is a problem if someone tries to run bmap() on $Boot system file + * as that really is in block zero but there is nothing we can do. bmap() is + * just broken in that respect (just like it cannot distinguish sparse from + * not available or error). */ -const struct address_space_operations ntfs_aops = { - .readpage = ntfs_readpage, /* Fill page with data. */ +static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) +{ + s64 ofs, size; + loff_t i_size; + LCN lcn; + unsigned long blocksize, flags; + ntfs_inode *ni = NTFS_I(mapping->host); + ntfs_volume *vol = ni->vol; + unsigned delta; + unsigned char blocksize_bits, cluster_size_shift; + + ntfs_debug("Entering for mft_no 0x%lx, logical block 0x%llx.", + ni->mft_no, (unsigned long long)block); + if (ni->type != AT_DATA || !NInoNonResident(ni) || NInoEncrypted(ni)) { + ntfs_error(vol->sb, "BMAP does not make sense for %s " + "attributes, returning 0.", + (ni->type != AT_DATA) ? "non-data" : + (!NInoNonResident(ni) ? "resident" : + "encrypted")); + return 0; + } + /* None of these can happen. */ + BUG_ON(NInoCompressed(ni)); + BUG_ON(NInoMstProtected(ni)); + blocksize = vol->sb->s_blocksize; + blocksize_bits = vol->sb->s_blocksize_bits; + ofs = (s64)block << blocksize_bits; + read_lock_irqsave(&ni->size_lock, flags); + size = ni->initialized_size; + i_size = i_size_read(VFS_I(ni)); + read_unlock_irqrestore(&ni->size_lock, flags); + /* + * If the offset is outside the initialized size or the block straddles + * the initialized size then pretend it is a hole unless the + * initialized size equals the file size. + */ + if (unlikely(ofs >= size || (ofs + blocksize > size && size < i_size))) + goto hole; + cluster_size_shift = vol->cluster_size_bits; + down_read(&ni->runlist.lock); + lcn = ntfs_attr_vcn_to_lcn_nolock(ni, ofs >> cluster_size_shift, false); + up_read(&ni->runlist.lock); + if (unlikely(lcn < LCN_HOLE)) { + /* + * Step down to an integer to avoid gcc doing a long long + * comparision in the switch when we know @lcn is between + * LCN_HOLE and LCN_EIO (i.e. -1 to -5). + * + * Otherwise older gcc (at least on some architectures) will + * try to use __cmpdi2() which is of course not available in + * the kernel. + */ + switch ((int)lcn) { + case LCN_ENOENT: + /* + * If the offset is out of bounds then pretend it is a + * hole. + */ + goto hole; + case LCN_ENOMEM: + ntfs_error(vol->sb, "Not enough memory to complete " + "mapping for inode 0x%lx. " + "Returning 0.", ni->mft_no); + break; + default: + ntfs_error(vol->sb, "Failed to complete mapping for " + "inode 0x%lx. Run chkdsk. " + "Returning 0.", ni->mft_no); + break; + } + return 0; + } + if (lcn < 0) { + /* It is a hole. */ +hole: + ntfs_debug("Done (returning hole)."); + return 0; + } + /* + * The block is really allocated and fullfils all our criteria. + * Convert the cluster to units of block size and return the result. + */ + delta = ofs & vol->cluster_size_mask; + if (unlikely(sizeof(block) < sizeof(lcn))) { + block = lcn = ((lcn << cluster_size_shift) + delta) >> + blocksize_bits; + /* If the block number was truncated return 0. */ + if (unlikely(block != lcn)) { + ntfs_error(vol->sb, "Physical block 0x%llx is too " + "large to be returned, returning 0.", + (long long)lcn); + return 0; + } + } else + block = ((lcn << cluster_size_shift) + delta) >> + blocksize_bits; + ntfs_debug("Done (returning block 0x%llx).", (unsigned long long)lcn); + return block; +} + +/** + * ntfs_normal_aops - address space operations for normal inodes and attributes + * + * Note these are not used for compressed or mst protected inodes and + * attributes. + */ +const struct address_space_operations ntfs_normal_aops = { + .readpage = ntfs_readpage, #ifdef NTFS_RW - .writepage = ntfs_writepage, /* Write dirty page to disk. */ + .writepage = ntfs_writepage, + .set_page_dirty = __set_page_dirty_buffers, +#endif /* NTFS_RW */ + .bmap = ntfs_bmap, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, +}; + +/** + * ntfs_compressed_aops - address space operations for compressed inodes + */ +const struct address_space_operations ntfs_compressed_aops = { + .readpage = ntfs_readpage, +#ifdef NTFS_RW + .writepage = ntfs_writepage, + .set_page_dirty = __set_page_dirty_buffers, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, /* Move a page cache page from - one physical page to an - other. */ + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1564,9 +1704,8 @@ const struct address_space_operations ntfs_mst_aops = { without touching the buffers belonging to the page. */ #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, /* Move a page cache page from - one physical page to an - other. */ + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index f47af5e6e23..898b9949d36 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1,7 +1,7 @@ /** - * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. + * inode.c - NTFS kernel inode handling. * - * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -1012,6 +1012,7 @@ skip_large_dir_stuff: /* Setup the operations for this inode. */ vi->i_op = &ntfs_dir_inode_ops; vi->i_fop = &ntfs_dir_ops; + vi->i_mapping->a_ops = &ntfs_mst_aops; } else { /* It is a file. */ ntfs_attr_reinit_search_ctx(ctx); @@ -1160,11 +1161,12 @@ no_data_attr_special_case: /* Setup the operations for this inode. */ vi->i_op = &ntfs_file_inode_ops; vi->i_fop = &ntfs_file_ops; + vi->i_mapping->a_ops = &ntfs_normal_aops; + if (NInoMstProtected(ni)) + vi->i_mapping->a_ops = &ntfs_mst_aops; + else if (NInoCompressed(ni)) + vi->i_mapping->a_ops = &ntfs_compressed_aops; } - if (NInoMstProtected(ni)) - vi->i_mapping->a_ops = &ntfs_mst_aops; - else - vi->i_mapping->a_ops = &ntfs_aops; /* * The number of 512-byte blocks used on disk (for stat). This is in so * far inaccurate as it doesn't account for any named streams or other @@ -1414,10 +1416,11 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ni->allocated_size = sle64_to_cpu( a->data.non_resident.allocated_size); } + vi->i_mapping->a_ops = &ntfs_normal_aops; if (NInoMstProtected(ni)) vi->i_mapping->a_ops = &ntfs_mst_aops; - else - vi->i_mapping->a_ops = &ntfs_aops; + else if (NInoCompressed(ni)) + vi->i_mapping->a_ops = &ntfs_compressed_aops; if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT) vi->i_blocks = ni->itype.compressed.size >> 9; else diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index d6a340bf80f..c581e26a350 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h @@ -1,8 +1,7 @@ /* - * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS - * project. + * ntfs.h - Defines for NTFS Linux kernel driver. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. * Copyright (C) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -57,7 +56,8 @@ extern struct kmem_cache *ntfs_attr_ctx_cache; extern struct kmem_cache *ntfs_index_ctx_cache; /* The various operations structs defined throughout the driver files. */ -extern const struct address_space_operations ntfs_aops; +extern const struct address_space_operations ntfs_normal_aops; +extern const struct address_space_operations ntfs_compressed_aops; extern const struct address_space_operations ntfs_mst_aops; extern const struct file_operations ntfs_file_ops; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 192297b0090..fafb7a02a5d 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -320,10 +320,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, compressed ? ".enc.z" : ""); break; case PSTORE_TYPE_CONSOLE: - sprintf(name, "console-%s", psname); + sprintf(name, "console-%s-%lld", psname, id); break; case PSTORE_TYPE_FTRACE: - sprintf(name, "ftrace-%s", psname); + sprintf(name, "ftrace-%s-%lld", psname, id); break; case PSTORE_TYPE_MCE: sprintf(name, "mce-%s-%lld", psname, id); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 017b6afe340..24b4ebea0d4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1670,8 +1670,6 @@ xfs_alloc_buftarg( btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; btp->bt_bdi = blk_get_backing_dev_info(bdev); - if (!btp->bt_bdi) - goto error; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e488e9459a9..5da6012b7a1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -28,12 +28,10 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ BDI_registered, /* bdi_register() was done */ BDI_writeback_running, /* Writeback is in progress */ - BDI_unused, /* Available bits start here */ }; typedef int (congested_fn)(void *, int); @@ -50,7 +48,6 @@ enum bdi_stat_item { struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ - unsigned int nr; unsigned long last_old_flush; /* last old data flush */ @@ -124,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; diff --git a/include/linux/bio.h b/include/linux/bio.h index b39e5000ff5..7347f486cec 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -292,7 +292,24 @@ static inline unsigned bio_segments(struct bio *bio) */ #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +enum bip_flags { + BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ + BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ + BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ + BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ +}; + #if defined(CONFIG_BLK_DEV_INTEGRITY) + +static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) +{ + if (bio->bi_rw & REQ_INTEGRITY) + return bio->bi_integrity; + + return NULL; +} + /* * bio integrity payload */ @@ -301,21 +318,40 @@ struct bio_integrity_payload { struct bvec_iter bip_iter; - /* kill - should just use bip_vec */ - void *bip_buf; /* generated integrity data */ - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ - unsigned bip_owns_buf:1; /* should free bip_buf */ + unsigned short bip_flags; /* control flags */ struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; + +static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + + if (bip) + return bip->bip_flags & flag; + + return false; +} + +static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) +{ + return bip->bip_iter.bi_sector; +} + +static inline void bip_set_seed(struct bio_integrity_payload *bip, + sector_t seed) +{ + bip->bip_iter.bi_sector = seed; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ extern void bio_trim(struct bio *bio, int offset, int size); @@ -342,6 +378,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, } extern struct bio_set *bioset_create(unsigned int, unsigned int); +extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(int pool_entries); @@ -353,7 +390,6 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio_set *fs_bio_set; -unsigned int bio_integrity_tag_size(struct bio *bio); static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { @@ -661,14 +697,10 @@ struct biovec_slab { for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) -#define bio_integrity(bio) (bio->bi_integrity != NULL) - extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern int bio_integrity_enabled(struct bio *bio); -extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); -extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); +extern bool bio_integrity_enabled(struct bio *bio); extern int bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); @@ -680,14 +712,14 @@ extern void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ -static inline int bio_integrity(struct bio *bio) +static inline void *bio_integrity(struct bio *bio) { - return 0; + return NULL; } -static inline int bio_integrity_enabled(struct bio *bio) +static inline bool bio_integrity_enabled(struct bio *bio) { - return 0; + return false; } static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) @@ -733,6 +765,11 @@ static inline void bio_integrity_init(void) return; } +static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) +{ + return false; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLOCK */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c13a0c09fae..c9be1589415 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -4,6 +4,7 @@ #include <linux/blkdev.h> struct blk_mq_tags; +struct blk_flush_queue; struct blk_mq_cpu_notifier { struct list_head list; @@ -34,6 +35,7 @@ struct blk_mq_hw_ctx { struct request_queue *queue; unsigned int queue_num; + struct blk_flush_queue *fq; void *driver_data; @@ -77,8 +79,9 @@ struct blk_mq_tag_set { struct list_head tag_list; }; -typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); +typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); +typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_request_fn)(void *, struct request *, unsigned int, @@ -86,6 +89,9 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int, typedef void (exit_request_fn)(void *, struct request *, unsigned int, unsigned int); +typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, + bool); + struct blk_mq_ops { /* * Queue request @@ -100,7 +106,7 @@ struct blk_mq_ops { /* * Called on request timeout */ - rq_timed_out_fn *timeout; + timeout_fn *timeout; softirq_done_fn *complete; @@ -115,6 +121,10 @@ struct blk_mq_ops { /* * Called for every command allocated by the block layer to allow * the driver to set up driver specific data. + * + * Tag greater than or equal to queue_depth is for setting up + * flush request. + * * Ditto for exit/teardown. */ init_request_fn *init_request; @@ -160,8 +170,9 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); -void blk_mq_end_io(struct request *rq, int error); -void __blk_mq_end_io(struct request *rq, int error); +void blk_mq_start_request(struct request *rq); +void blk_mq_end_request(struct request *rq, int error); +void __blk_mq_end_request(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); @@ -174,7 +185,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); +void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, + void *priv); /* * Driver command data is immediately after the request. So subtract request diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 66c2167f04a..445d59231bc 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -78,9 +78,11 @@ struct bio { struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; #endif + union { #if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload *bi_integrity; /* data integrity */ + struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif + }; unsigned short bi_vcnt; /* how many bio_vec's */ @@ -118,10 +120,8 @@ struct bio { #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ -#define BIO_QUIET 10 /* Make BIO Quiet */ -#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ -#define BIO_SNAP_STABLE 12 /* bio data must be snapshotted during write */ +#define BIO_QUIET 9 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ /* * Flags starting here get preserved by bio_reset() - this includes @@ -162,6 +162,7 @@ enum rq_flag_bits { __REQ_WRITE_SAME, /* write same block many times */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_FLUSH, /* request for cache flush */ @@ -186,9 +187,7 @@ enum rq_flag_bits { __REQ_FLUSH_SEQ, /* request for flush sequence */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_KERNEL, /* direct IO to kernel pages */ __REQ_PM, /* runtime pm request */ - __REQ_END, /* last of chain of requests */ __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NR_BITS, /* stops here */ @@ -204,13 +203,14 @@ enum rq_flag_bits { #define REQ_DISCARD (1ULL << __REQ_DISCARD) #define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME) #define REQ_NOIDLE (1ULL << __REQ_NOIDLE) +#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ - REQ_SECURE) + REQ_SECURE | REQ_INTEGRITY) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) @@ -240,9 +240,7 @@ enum rq_flag_bits { #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) #define REQ_SECURE (1ULL << __REQ_SECURE) -#define REQ_KERNEL (1ULL << __REQ_KERNEL) #define REQ_PM (1ULL << __REQ_PM) -#define REQ_END (1ULL << __REQ_END) #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 87be398166d..0207a78a8d8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -36,6 +36,7 @@ struct request; struct sg_io_hdr; struct bsg_job; struct blkcg_gq; +struct blk_flush_queue; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -455,14 +456,7 @@ struct request_queue { */ unsigned int flush_flags; unsigned int flush_not_queueable:1; - unsigned int flush_queue_delayed:1; - unsigned int flush_pending_idx:1; - unsigned int flush_running_idx:1; - unsigned long flush_pending_since; - struct list_head flush_queue[2]; - struct list_head flush_data_in_flight; - struct request *flush_rq; - spinlock_t mq_flush_lock; + struct blk_flush_queue *fq; struct list_head requeue_list; spinlock_t requeue_lock; @@ -865,7 +859,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; + return bdev->bd_disk->queue; /* this is never NULL */ } /* @@ -1285,10 +1279,9 @@ static inline int queue_alignment_offset(struct request_queue *q) static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = (sector << 9) & (granularity - 1); + unsigned int alignment = sector_div(sector, granularity >> 9) << 9; - return (granularity + lim->alignment_offset - alignment) - & (granularity - 1); + return (granularity + lim->alignment_offset - alignment) % granularity; } static inline int bdev_alignment_offset(struct block_device *bdev) @@ -1464,32 +1457,31 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) #if defined(CONFIG_BLK_DEV_INTEGRITY) -#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ -#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ +enum blk_integrity_flags { + BLK_INTEGRITY_VERIFY = 1 << 0, + BLK_INTEGRITY_GENERATE = 1 << 1, + BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, + BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, +}; -struct blk_integrity_exchg { +struct blk_integrity_iter { void *prot_buf; void *data_buf; - sector_t sector; + sector_t seed; unsigned int data_size; - unsigned short sector_size; + unsigned short interval; const char *disk_name; }; -typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); -typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); -typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); -typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); +typedef int (integrity_processing_fn) (struct blk_integrity_iter *); struct blk_integrity { - integrity_gen_fn *generate_fn; - integrity_vrfy_fn *verify_fn; - integrity_set_tag_fn *set_tag_fn; - integrity_get_tag_fn *get_tag_fn; + integrity_processing_fn *generate_fn; + integrity_processing_fn *verify_fn; unsigned short flags; unsigned short tuple_size; - unsigned short sector_size; + unsigned short interval; unsigned short tag_size; const char *name; @@ -1504,10 +1496,10 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -extern int blk_integrity_merge_rq(struct request_queue *, struct request *, - struct request *); -extern int blk_integrity_merge_bio(struct request_queue *, struct request *, - struct bio *); +extern bool blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern bool blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1520,12 +1512,9 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) return disk->integrity; } -static inline int blk_integrity_rq(struct request *rq) +static inline bool blk_integrity_rq(struct request *rq) { - if (rq->bio == NULL) - return 0; - - return bio_integrity(rq->bio); + return rq->cmd_flags & REQ_INTEGRITY; } static inline void blk_queue_max_integrity_segments(struct request_queue *q, @@ -1590,15 +1579,15 @@ static inline unsigned short queue_max_integrity_segments(struct request_queue * { return 0; } -static inline int blk_integrity_merge_rq(struct request_queue *rq, - struct request *r1, - struct request *r2) +static inline bool blk_integrity_merge_rq(struct request_queue *rq, + struct request *r1, + struct request *r2) { return 0; } -static inline int blk_integrity_merge_bio(struct request_queue *rq, - struct request *r, - struct bio *b) +static inline bool blk_integrity_merge_bio(struct request_queue *rq, + struct request *r, + struct bio *b) { return 0; } diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index b3cb71f0d3b..cf53d0773ce 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -6,7 +6,8 @@ #define CRC_T10DIF_DIGEST_SIZE 2 #define CRC_T10DIF_BLOCK_SIZE 1 -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); -__u16 crc_t10dif(unsigned char const *, size_t); +extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, + size_t len); +extern __u16 crc_t10dif(unsigned char const *, size_t); #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index ab4f1a10da2..a957d4366c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -192,8 +192,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define KERNEL_READ (READ|REQ_KERNEL) -#define KERNEL_WRITE (WRITE|REQ_KERNEL) #define READ_SYNC (READ | REQ_SYNC) #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index c0894dd8827..667c31101b8 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -178,12 +178,12 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ nla = ntb[attr_nr]; \ if (nla) { \ - if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ + if (exclude_invariants && !!((attr_flag) & DRBD_F_INVARIANT)) { \ pr_info("<< must not change invariant attr: %s\n", #name); \ return -EEXIST; \ } \ assignment; \ - } else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ + } else if (exclude_invariants && !!((attr_flag) & DRBD_F_INVARIANT)) { \ /* attribute missing from payload, */ \ /* which was expected */ \ } else if ((attr_flag) & DRBD_F_REQUIRED) { \ diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h index 9be37da9368..e985ba679c4 100644 --- a/include/linux/kernelcapi.h +++ b/include/linux/kernelcapi.h @@ -41,7 +41,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 buf[CAPI_MANUFACTURER_LEN]); u16 capi20_get_version(u32 contr, struct capi_version *verp); u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN]); u16 capi20_get_profile(u32 contr, struct capi_profile *profp); -int capi20_manufacturer(unsigned int cmd, void __user *data); +int capi20_manufacturer(unsigned long cmd, void __user *data); #define CAPICTR_UP 0 #define CAPICTR_DOWN 1 diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 37ef6b19408..299d7d31fe5 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -153,7 +153,7 @@ struct cfi_ident { uint16_t MaxBufWriteSize; uint8_t NumEraseRegions; uint32_t EraseRegionInfo[0]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; /* Extended Query Structure for both PRI and ALT */ @@ -161,7 +161,7 @@ struct cfi_extquery { uint8_t pri[3]; uint8_t MajorVersion; uint8_t MinorVersion; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */ @@ -180,7 +180,7 @@ struct cfi_pri_intelext { uint8_t FactProtRegSize; uint8_t UserProtRegSize; uint8_t extra[0]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_otpinfo { uint32_t ProtRegAddr; @@ -188,7 +188,7 @@ struct cfi_intelext_otpinfo { uint8_t FactProtRegSize; uint16_t UserGroups; uint8_t UserProtRegSize; -} __attribute__((packed)); +} __packed; struct cfi_intelext_blockinfo { uint16_t NumIdentBlocks; @@ -196,7 +196,7 @@ struct cfi_intelext_blockinfo { uint16_t MinBlockEraseCycles; uint8_t BitsPerCell; uint8_t BlockCap; -} __attribute__((packed)); +} __packed; struct cfi_intelext_regioninfo { uint16_t NumIdentPartitions; @@ -205,7 +205,7 @@ struct cfi_intelext_regioninfo { uint8_t NumOpAllowedSimEraMode; uint8_t NumBlockTypes; struct cfi_intelext_blockinfo BlockTypes[1]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_programming_regioninfo { uint8_t ProgRegShift; @@ -214,7 +214,7 @@ struct cfi_intelext_programming_regioninfo { uint8_t Reserved2; uint8_t ControlInvalid; uint8_t Reserved3; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */ @@ -233,7 +233,7 @@ struct cfi_pri_amdstd { uint8_t VppMin; uint8_t VppMax; uint8_t TopBottom; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Atmel chips (command set 0x0002) */ @@ -245,18 +245,18 @@ struct cfi_pri_atmel { uint8_t BottomBoot; uint8_t BurstMode; uint8_t PageMode; -} __attribute__((packed)); +} __packed; struct cfi_pri_query { uint8_t NumFields; uint32_t ProtField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; struct cfi_bri_query { uint8_t PageModeReadCap; uint8_t NumFields; uint32_t ConfField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; #define P_ID_NONE 0x0000 #define P_ID_INTEL_EXT 0x0001 diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c300db3ae28..e4d451e4600 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -587,6 +587,11 @@ struct nand_buffers { * @ecc_step_ds: [INTERN] ECC step required by the @ecc_strength_ds, * also from the datasheet. It is the recommended ECC step * size, if known; if unknown, set to zero. + * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is + * either deduced from the datasheet if the NAND + * chip is not ONFI compliant or set to 0 if it is + * (an ONFI chip is always configured in mode 0 + * after a NAND reset) * @numchips: [INTERN] number of physical chips * @chipsize: [INTERN] the size of one chip for multichip arrays * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 @@ -671,6 +676,7 @@ struct nand_chip { uint8_t bits_per_cell; uint16_t ecc_strength_ds; uint16_t ecc_step_ds; + int onfi_timing_mode_default; int badblockpos; int badblockbits; @@ -766,12 +772,17 @@ struct nand_chip { * @options: stores various chip bit options * @id_len: The valid length of the @id. * @oobsize: OOB size + * @ecc: ECC correctability and step information from the datasheet. * @ecc.strength_ds: The ECC correctability from the datasheet, same as the * @ecc_strength_ds in nand_chip{}. * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the * @ecc_step_ds in nand_chip{}, also from the datasheet. * For example, the "4bit ECC for each 512Byte" can be set with * NAND_ECC_INFO(4, 512). + * @onfi_timing_mode_default: the default ONFI timing mode entered after a NAND + * reset. Should be deduced from timings described + * in the datasheet. + * */ struct nand_flash_dev { char *name; @@ -792,6 +803,7 @@ struct nand_flash_dev { uint16_t strength_ds; uint16_t step_ds; } ecc; + int onfi_timing_mode_default; }; /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 838407aea70..74fd5d37f15 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -998,6 +998,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. + * bool (*ndo_gso_check) (struct sk_buff *skb, + * struct net_device *dev); + * Called by core transmit path to determine if device is capable of + * performing GSO on a packet. The device returns true if it is + * able to GSO the packet, false otherwise. If the return value is + * false the stack will do software GSO. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1147,6 +1153,8 @@ struct net_device_ops { struct net_device *dev, void *priv); int (*ndo_get_lock_subclass)(struct net_device *dev); + bool (*ndo_gso_check) (struct sk_buff *skb, + struct net_device *dev); }; /** @@ -3572,10 +3580,12 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline bool netif_needs_gso(struct sk_buff *skb, +static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || + (dev->netdev_ops->ndo_gso_check && + !dev->netdev_ops->ndo_gso_check(skb, dev)) || unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && (skb->ip_summed != CHECKSUM_UNNECESSARY))); } diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 026b0c042c4..356acc2846f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -487,6 +487,9 @@ enum { NFSPROC4_CLNT_GETDEVICELIST, NFSPROC4_CLNT_BIND_CONN_TO_SESSION, NFSPROC4_CLNT_DESTROY_CLIENTID, + + /* nfs42 */ + NFSPROC4_CLNT_SEEK, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 28d649054d5..c72d1ad41ad 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -448,10 +448,10 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos, bool uio); + loff_t pos); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos, bool uio); + loff_t pos); /* * linux/fs/nfs/dir.c diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 922be2e050f..a32ba0d7a98 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -230,5 +230,6 @@ struct nfs_server { #define NFS_CAP_STATEID_NFSV41 (1U << 16) #define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #define NFS_CAP_SECURITY_LABEL (1U << 18) +#define NFS_CAP_SEEK (1U << 19) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6951c7d9097..983876f24ae 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1231,6 +1231,25 @@ struct pnfs_ds_commit_info { #endif /* CONFIG_NFS_V4_1 */ +#ifdef CONFIG_NFS_V4_2 +struct nfs42_seek_args { + struct nfs4_sequence_args seq_args; + + struct nfs_fh *sa_fh; + nfs4_stateid sa_stateid; + u64 sa_offset; + u32 sa_what; +}; + +struct nfs42_seek_res { + struct nfs4_sequence_res seq_res; + unsigned int status; + + u32 sr_eof; + u64 sr_offset; +}; +#endif + struct nfs_page; #define NFS_PAGEVEC_SIZE (8U) diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 780d1e97f62..b8686c00f15 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -42,8 +42,24 @@ struct elm_errorvec { int error_loc[16]; }; +#if IS_ENABLED(CONFIG_MTD_NAND_OMAP_BCH) void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, struct elm_errorvec *err_vec); int elm_config(struct device *dev, enum bch_ecc bch_type, int ecc_steps, int ecc_step_size, int ecc_syndrome_size); +#else +static inline void +elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, + struct elm_errorvec *err_vec) +{ +} + +static inline int elm_config(struct device *dev, enum bch_ecc bch_type, + int ecc_steps, int ecc_step_size, + int ecc_syndrome_size) +{ + return -ENOSYS; +} +#endif /* CONFIG_MTD_NAND_ECC_BCH */ + #endif /* __ELM_H */ diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 16ec262dfcc..090bbab0130 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -71,6 +71,7 @@ struct omap_nand_platform_data { struct mtd_partition *parts; int nr_parts; bool dev_ready; + bool flash_bbt; enum nand_io xfer_type; int devsize; enum omap_ecc ecc_opt; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3ab0749d687..a59d9343c25 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,7 +1203,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ + + /* Free our shared copy */ + if (likely(nskb)) + consume_skb(skb); + else + kfree_skb(skb); skb = nskb; } return skb; diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h new file mode 100644 index 00000000000..6a8b9942632 --- /dev/null +++ b/include/linux/t10-pi.h @@ -0,0 +1,22 @@ +#ifndef _LINUX_T10_PI_H +#define _LINUX_T10_PI_H + +#include <linux/types.h> +#include <linux/blkdev.h> + +/* + * T10 Protection Information tuple. + */ +struct t10_pi_tuple { + __be16 guard_tag; /* Checksum */ + __be16 app_tag; /* Opaque storage */ + __be32 ref_tag; /* Target LBA or indirect LBA */ +}; + + +extern struct blk_integrity t10_pi_type1_crc; +extern struct blk_integrity t10_pi_type1_ip; +extern struct blk_integrity t10_pi_type3_crc; +extern struct blk_integrity t10_pi_type3_ip; + +#endif diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b46671e28de..65261a7244f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -78,6 +78,10 @@ bool virtqueue_is_broken(struct virtqueue *vq); /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus + * @failed: saved value for CONFIG_S_FAILED bit (for restore) + * @config_enabled: configuration change reporting enabled + * @config_change_pending: configuration change reported while disabled + * @config_lock: protects configuration change reporting * @dev: underlying device. * @id: the device type identification (used to match it with a driver). * @config: the configuration ops for this device. @@ -88,6 +92,10 @@ bool virtqueue_is_broken(struct virtqueue *vq); */ struct virtio_device { int index; + bool failed; + bool config_enabled; + bool config_change_pending; + spinlock_t config_lock; struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; @@ -108,6 +116,12 @@ void unregister_virtio_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev); +void virtio_config_changed(struct virtio_device *dev); +#ifdef CONFIG_PM_SLEEP +int virtio_device_freeze(struct virtio_device *dev); +int virtio_device_restore(struct virtio_device *dev); +#endif + /** * virtio_driver - operations for a virtio I/O driver * @driver: underlying device driver (populate name and owner). diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index e8f8f71e843..7f4ef66873e 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -109,6 +109,23 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, return vq; } +/** + * virtio_device_ready - enable vq use in probe function + * @vdev: the device + * + * Driver must call this to use vqs in the probe function. + * + * Note: vqs are enabled automatically after probe returns. + */ +static inline +void virtio_device_ready(struct virtio_device *dev) +{ + unsigned status = dev->config->get_status(dev); + + BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); + dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); +} + static inline const char *virtio_bus_name(struct virtio_device *vdev) { diff --git a/include/net/dsa.h b/include/net/dsa.h index 58ad8c6492d..b7655929353 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -18,6 +18,7 @@ #include <linux/of.h> #include <linux/phy.h> #include <linux/phy_fixed.h> +#include <linux/ethtool.h> enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 55a8d4056cc..98e5f9578f8 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -146,7 +146,6 @@ struct ifacaddr6 { struct ifacaddr6 *aca_next; int aca_users; atomic_t aca_refcnt; - spinlock_t aca_lock; unsigned long aca_cstamp; unsigned long aca_tstamp; }; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index ae061354430..d1d272843b3 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -80,7 +80,8 @@ static inline struct sock *__inet6_lookup(struct net *net, static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, const __be16 sport, - const __be16 dport) + const __be16 dport, + int iif) { struct sock *sk = skb_steal_sock(skb); @@ -90,7 +91,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + iif); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/netlink.h b/include/net/netlink.h index 6c1076275aa..7b903e1bdbb 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -431,7 +431,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) /** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in - * @portid: netlink process id + * @portid: netlink PORTID of requesting application * @seq: sequence number of message * @type: message type * @payload: length of message payload diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 9fbd856e671..856f01cb51d 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -426,6 +426,11 @@ static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_associat asoc->pmtu_pending = 0; } +static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) +{ + return !list_empty(&chunk->list); +} + /* Walk through a list of TLV parameters. Don't trust the * individual parameter lengths and instead depend on * the chunk length to indicate when to stop. Make sure diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 7f4eeb340a5..72a31db47de 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -248,9 +248,9 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, int, __be16); struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr); -int sctp_verify_asconf(const struct sctp_association *asoc, - struct sctp_paramhdr *param_hdr, void *chunk_end, - struct sctp_paramhdr **errp); +bool sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, bool addr_param_needed, + struct sctp_paramhdr **errp); struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf); int sctp_process_asconf_ack(struct sctp_association *asoc, diff --git a/include/net/tcp.h b/include/net/tcp.h index 74efeda994b..c9766f89deb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -468,8 +468,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, - struct ip_options *opt); +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. @@ -730,6 +729,15 @@ struct tcp_skb_cb { #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) + +/* This is the variant of inet6_iif() that must be used by TCP, + * as TCP moves IP6CB into a different location in skb->cb[] + */ +static inline int tcp_v6_iif(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->header.h6.iif; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ @@ -1666,4 +1674,24 @@ int tcpv4_offload_init(void); void tcp_v4_init(void); void tcp_init(void); +/* + * Save and compile IPv4 options, return a pointer to it + */ +static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) +{ + const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + struct ip_options_rcu *dopt = NULL; + + if (opt->optlen) { + int opt_size = sizeof(*dopt) + opt->optlen; + + dopt = kmalloc(opt_size, GFP_ATOMIC); + if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { + kfree(dopt); + dopt = NULL; + } + } + return dopt; +} + #endif /* _TCP_H */ diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 73f34904494..522a5f27f55 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -10,9 +10,10 @@ #include <scsi/scsi_device.h> struct Scsi_Host; -struct scsi_device; struct scsi_driver; +#include <scsi/scsi_device.h> + /* * MAX_COMMAND_SIZE is: * The longest fixed-length SCSI CDB as per the SCSI standard. @@ -81,6 +82,7 @@ struct scsi_cmnd { unsigned char prot_op; unsigned char prot_type; + unsigned char prot_flags; unsigned short cmd_len; enum dma_data_direction sc_data_direction; @@ -252,6 +254,14 @@ static inline unsigned char scsi_get_prot_op(struct scsi_cmnd *scmd) return scmd->prot_op; } +enum scsi_prot_flags { + SCSI_PROT_TRANSFER_PI = 1 << 0, + SCSI_PROT_GUARD_CHECK = 1 << 1, + SCSI_PROT_REF_CHECK = 1 << 2, + SCSI_PROT_REF_INCREMENT = 1 << 3, + SCSI_PROT_IP_CHECKSUM = 1 << 4, +}; + /* * The controller usually does not know anything about the target it * is communicating with. However, when DIX is enabled the controller @@ -280,6 +290,17 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) return blk_rq_pos(scmd->request); } +static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd) +{ + return scmd->device->sector_size; +} + +static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd) +{ + return blk_rq_pos(scmd->request) >> + (ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff; +} + static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd) { return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0; @@ -316,17 +337,12 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status) static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) { unsigned int xfer_len = scsi_out(scmd)->length; - unsigned int prot_op = scsi_get_prot_op(scmd); - unsigned int sector_size = scmd->device->sector_size; + unsigned int prot_interval = scsi_prot_interval(scmd); - switch (prot_op) { - case SCSI_PROT_NORMAL: - case SCSI_PROT_WRITE_STRIP: - case SCSI_PROT_READ_INSERT: - return xfer_len; - } + if (scmd->prot_flags & SCSI_PROT_TRANSFER_PI) + xfer_len += (xfer_len >> ilog2(prot_interval)) * 8; - return xfer_len + (xfer_len >> ilog2(sector_size)) * 8; + return xfer_len; } #endif /* _SCSI_SCSI_CMND_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 3cc8e1c2b99..6cad97485ba 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -68,6 +68,7 @@ header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h header-y += bpf.h +header-y += bpf_common.h header-y += bpqether.h header-y += bsg.h header-y += btrfs.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 31b0ac208a5..d18316f9e9c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -8,6 +8,7 @@ #define _UAPI__LINUX_BPF_H__ #include <linux/types.h> +#include <linux/bpf_common.h> /* Extended instruction set based on top of classic BPF */ diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h new file mode 100644 index 00000000000..a5c220e0828 --- /dev/null +++ b/include/uapi/linux/bpf_common.h @@ -0,0 +1,55 @@ +#ifndef _UAPI__LINUX_BPF_COMMON_H__ +#define _UAPI__LINUX_BPF_COMMON_H__ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 +#define BPF_H 0x08 +#define BPF_B 0x10 +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +#ifndef BPF_MAXINSNS +#define BPF_MAXINSNS 4096 +#endif + +#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index c8a4302093a..3315ab21f72 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 27 +#define DM_VERSION_MINOR 28 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2013-10-30)" +#define DM_VERSION_EXTRA "-ioctl (2014-09-17)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 253b4d42cf2..47785d5ecf1 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -7,7 +7,7 @@ #include <linux/compiler.h> #include <linux/types.h> - +#include <linux/bpf_common.h> /* * Current version of the filter code architecture. @@ -32,56 +32,6 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ struct sock_filter __user *filter; }; -/* - * Instruction classes - */ - -#define BPF_CLASS(code) ((code) & 0x07) -#define BPF_LD 0x00 -#define BPF_LDX 0x01 -#define BPF_ST 0x02 -#define BPF_STX 0x03 -#define BPF_ALU 0x04 -#define BPF_JMP 0x05 -#define BPF_RET 0x06 -#define BPF_MISC 0x07 - -/* ld/ldx fields */ -#define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 -#define BPF_H 0x08 -#define BPF_B 0x10 -#define BPF_MODE(code) ((code) & 0xe0) -#define BPF_IMM 0x00 -#define BPF_ABS 0x20 -#define BPF_IND 0x40 -#define BPF_MEM 0x60 -#define BPF_LEN 0x80 -#define BPF_MSH 0xa0 - -/* alu/jmp fields */ -#define BPF_OP(code) ((code) & 0xf0) -#define BPF_ADD 0x00 -#define BPF_SUB 0x10 -#define BPF_MUL 0x20 -#define BPF_DIV 0x30 -#define BPF_OR 0x40 -#define BPF_AND 0x50 -#define BPF_LSH 0x60 -#define BPF_RSH 0x70 -#define BPF_NEG 0x80 -#define BPF_MOD 0x90 -#define BPF_XOR 0xa0 - -#define BPF_JA 0x00 -#define BPF_JEQ 0x10 -#define BPF_JGT 0x20 -#define BPF_JGE 0x30 -#define BPF_JSET 0x40 -#define BPF_SRC(code) ((code) & 0x08) -#define BPF_K 0x00 -#define BPF_X 0x08 - /* ret - BPF_K and BPF_X also apply */ #define BPF_RVAL(code) ((code) & 0x18) #define BPF_A 0x10 @@ -91,10 +41,6 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define BPF_TAX 0x00 #define BPF_TXA 0x80 -#ifndef BPF_MAXINSNS -#define BPF_MAXINSNS 4096 -#endif - /* * Macros for filter block array initializers. */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 4133e744e4e..74e7c60c471 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -39,7 +39,6 @@ #define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) #define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) #define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) -#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) #define RAID_AUTORUN _IO (MD_MAJOR, 0x14) #define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) diff --git a/kernel/futex.c b/kernel/futex.c index 815d7af2ffe..f3a3a071283 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -343,6 +343,8 @@ static void get_futex_key_refs(union futex_key *key) case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies MB (B) */ break; + default: + smp_mb(); /* explicit MB (B) */ } } diff --git a/kernel/module.c b/kernel/module.c index 65586ffa0c9..88cec1ddb1e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1842,7 +1842,9 @@ static void free_module(struct module *mod) /* We leave it in list to prevent duplicate loads, but make sure * that noone uses it while it's being deconstructed. */ + mutex_lock(&module_mutex); mod->state = MODULE_STATE_UNFORMED; + mutex_unlock(&module_mutex); /* Remove dynamic debug info */ ddebug_remove_module(mod->name); diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index f0b1aa3586d..7d0e5cd7b57 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -121,9 +121,9 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) raid6_2data_recov = best->data2; raid6_datap_recov = best->datap; - printk("raid6: using %s recovery algorithm\n", best->name); + pr_info("raid6: using %s recovery algorithm\n", best->name); } else - printk("raid6: Yikes! No recovery algorithm found!\n"); + pr_err("raid6: Yikes! No recovery algorithm found!\n"); return best; } @@ -157,18 +157,18 @@ static inline const struct raid6_calls *raid6_choose_gen( bestperf = perf; best = *algo; } - printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, + pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); } } if (best) { - printk("raid6: using algorithm %s (%ld MB/s)\n", + pr_info("raid6: using algorithm %s (%ld MB/s)\n", best->name, (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); raid6_call = *best; } else - printk("raid6: Yikes! No algorithm found!\n"); + pr_err("raid6: Yikes! No algorithm found!\n"); return best; } @@ -194,7 +194,7 @@ int __init raid6_select_algo(void) syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); if (!syndromes) { - printk("raid6: Yikes! No memory available.\n"); + pr_err("raid6: Yikes! No memory available.\n"); return -ENOMEM; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 12a992b6257..0ae0df55000 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -40,7 +40,7 @@ LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ struct workqueue_struct *bdi_wq; -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) +static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { spin_lock(&wb1->list_lock); @@ -376,13 +376,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); flush_delayed_work(&bdi->wb.dwork); WARN_ON(!list_empty(&bdi->work_list)); - - /* - * This shouldn't be necessary unless @bdi for some reason has - * unflushed dirty IO after work_list is drained. Do it anyway - * just in case. - */ - cancel_delayed_work_sync(&bdi->wb.dwork); + WARN_ON(delayed_work_pending(&bdi->wb.dwork)); } /* @@ -402,21 +396,15 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { - struct device *dev = bdi->dev; - - if (dev) { + if (bdi->dev) { bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); - - spin_lock_bh(&bdi->wb_lock); + device_unregister(bdi->dev); bdi->dev = NULL; - spin_unlock_bh(&bdi->wb_lock); - - device_unregister(dev); } } EXPORT_SYMBOL(bdi_unregister); @@ -487,8 +475,17 @@ void bdi_destroy(struct backing_dev_info *bdi) int i; /* - * Splice our entries to the default_backing_dev_info, if this - * bdi disappears + * Splice our entries to the default_backing_dev_info. This + * condition shouldn't happen. @wb must be empty at this point and + * dirty inodes on it might cause other issues. This workaround is + * added by ce5f8e779519 ("writeback: splice dirty inode entries to + * default bdi on bdi_destroy()") without root-causing the issue. + * + * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com + * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350 + * + * We should probably add WARN_ON() to find out whether it still + * happens and track it down if so. */ if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; @@ -503,12 +500,7 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); - /* - * If bdi_unregister() had already been called earlier, the dwork - * could still be pending because bdi_prune_sb() can race with the - * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). - */ - cancel_delayed_work_sync(&bdi->wb.dwork); + WARN_ON(delayed_work_pending(&bdi->wb.dwork)); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 6940d8fe897..daa749c8b3f 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -575,6 +575,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) /* Ceiling limit to avoid denial of service attacks */ chan->p9_max_pages = nr_free_buffer_pages()/4; + virtio_device_ready(vdev); + mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); mutex_unlock(&virtio_9p_lock); diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ba02db02290..5cd44f001f6 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -87,13 +87,12 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], { struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } + caif_assert(offsetof(struct cfusbl, layer) == 0); - memset(this, 0, sizeof(struct cflayer)); + memset(&this->layer, 0, sizeof(this->layer)); this->layer.receive = cfusbl_receive; this->layer.transmit = cfusbl_transmit; this->layer.ctrlcmd = cfusbl_ctrlcmd; diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 8c5d6386319..510aa5a753f 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -47,10 +47,10 @@ static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); struct cflayer *cfmuxl_create(void) { - struct cfmuxl *this = kmalloc(sizeof(struct cfmuxl), GFP_ATOMIC); + struct cfmuxl *this = kzalloc(sizeof(struct cfmuxl), GFP_ATOMIC); + if (!this) return NULL; - memset(this, 0, sizeof(*this)); this->layer.receive = cfmuxl_receive; this->layer.transmit = cfmuxl_transmit; this->layer.ctrlcmd = cfmuxl_ctrlcmd; diff --git a/net/core/dev.c b/net/core/dev.c index 6470716ddba..b793e3521a3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2675,7 +2675,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (skb->encapsulation) features &= dev->hw_enc_features; - if (netif_needs_gso(skb, features)) { + if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); diff --git a/net/core/sock.c b/net/core/sock.c index b4f3ea2fce6..15e0c67b106 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1718,6 +1718,8 @@ EXPORT_SYMBOL(sock_kmalloc); */ void sock_kfree_s(struct sock *sk, void *mem, int size) { + if (WARN_ON_ONCE(!mem)) + return; kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ad2acfe1ca6..6bcaa33cd80 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -757,7 +757,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, - dh->dccph_sport, dh->dccph_dport); + dh->dccph_sport, dh->dccph_dport, + inet6_iif(skb)); /* * Step 2: * If no socket ... diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8030489d9cb..a851e9f1411 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/etherdevice.h> #include <linux/phy.h> +#include <linux/phy_fixed.h> #include <linux/of_net.h> #include <linux/of_mdio.h> #include "dsa_priv.h" diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5b6efb3d230..f99f41bd15b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -537,7 +537,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) return 1; attrlen = rtnh_attrlen(rtnh); - if (attrlen < 0) { + if (attrlen > 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index efa70ad4490..32e78924e24 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -87,6 +87,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (!pskb_may_pull(skb, len)) goto drop; + uh = udp_hdr(skb); + guehdr = (struct guehdr *)&uh[1]; + if (guehdr->version != 0) goto drop; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e35b7128915..88e5ef2c7f5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1535,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, struct sk_buff *nskb; struct sock *sk; struct inet_sock *inet; + int err; if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) return; @@ -1574,8 +1575,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, sock_net_set(sk, net); __skb_queue_head_init(&sk->sk_write_queue); sk->sk_sndbuf = sysctl_wmem_default; - ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, &rt, MSG_DONTWAIT); + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); + if (unlikely(err)) { + ip_flush_pending_frames(sk); + goto out; + } + nskb = skb_peek(&sk->sk_write_queue); if (nskb) { if (arg->csumoffset >= 0) @@ -1587,7 +1593,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } - +out: put_cpu_var(unicast_sock); ip_rt_put(rt); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index f4c987bb7e9..88c386cf7d8 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -91,11 +91,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) skb_pull_rcsum(skb, hdr_len); if (inner_proto == htons(ETH_P_TEB)) { - struct ethhdr *eh = (struct ethhdr *)skb->data; + struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) return -ENOMEM; + eh = (struct ethhdr *)skb->data; if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) skb->protocol = eh->h_proto; else diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index af660030e3c..32b98d0207b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -255,9 +255,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, } EXPORT_SYMBOL(cookie_check_timestamp); -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, - struct ip_options *opt) +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { + struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct tcp_options_received tcp_opt; struct inet_request_sock *ireq; struct tcp_request_sock *treq; @@ -317,15 +317,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - if (opt && opt->optlen) { - int opt_size = sizeof(struct ip_options_rcu) + opt->optlen; - - ireq->opt = kmalloc(opt_size, GFP_ATOMIC); - if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) { - kfree(ireq->opt); - ireq->opt = NULL; - } - } + ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); @@ -344,7 +336,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, + opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 00a41499d52..a12b455928e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -68,6 +68,7 @@ #include <linux/module.h> #include <linux/sysctl.h> #include <linux/kernel.h> +#include <linux/prefetch.h> #include <net/dst.h> #include <net/tcp.h> #include <net/inet_common.h> @@ -3029,6 +3030,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) return packets_acked; } +static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, + u32 prior_snd_una) +{ + const struct skb_shared_info *shinfo; + + /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */ + if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))) + return; + + shinfo = skb_shinfo(skb); + if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && + between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)) + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); +} + /* Remove acknowledged frames from the retransmission queue. If our packet * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. @@ -3052,14 +3068,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt.v64 = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { - struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u8 sacked = scb->sacked; u32 acked_pcount; - if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - between(shinfo->tskey, prior_snd_una, tp->snd_una - 1)) - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + tcp_ack_tstamp(sk, skb, prior_snd_una); /* Determine how many packets and what bytes were acked, tso and else */ if (after(scb->end_seq, tp->snd_una)) { @@ -3073,10 +3086,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, fully_acked = false; } else { + /* Speedup tcp_unlink_write_queue() and next loop */ + prefetchw(skb->next); acked_pcount = tcp_skb_pcount(skb); } - if (sacked & TCPCB_RETRANS) { + if (unlikely(sacked & TCPCB_RETRANS)) { if (sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; @@ -3107,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->tcp_flags & TCPHDR_SYN)) { + if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3119,9 +3134,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - if (skb == tp->retransmit_skb_hint) + if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; - if (skb == tp->lost_skb_hint) + if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; } @@ -3132,7 +3147,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_SACK_RENEGING; skb_mstamp_get(&now); - if (first_ackt.v64) { + if (likely(first_ackt.v64)) { seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); } @@ -3394,6 +3409,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int acked = 0; /* Number of packets newly acked */ long sack_rtt_us = -1L; + /* We very likely will need to access write queue head. */ + prefetchw(sk->sk_write_queue.next); + /* If the ack is older than previous acks * then we can probably ignore it. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 552e87e3c26..94d1a7757ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -880,26 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk, } EXPORT_SYMBOL(tcp_syn_flood_action); -/* - * Save and compile IPv4 options into the request_sock if needed. - */ -static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) -{ - const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; - struct ip_options_rcu *dopt = NULL; - - if (opt && opt->optlen) { - int opt_size = sizeof(*dopt) + opt->optlen; - - dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { - kfree(dopt); - dopt = NULL; - } - } - return dopt; -} - #ifdef CONFIG_TCP_MD5SIG /* * RFC2385 MD5 checksumming requires a mapping of @@ -1428,7 +1408,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_SYN_COOKIES if (!th->syn) - sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt); + sk = cookie_v4_check(sk, skb); #endif return sk; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index becd98ce9a1..3af21296d96 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); + int wmem; + + /* Keep one reference on sk_wmem_alloc. + * Will be released by sk_free() from here or tcp_tasklet_func() + */ + wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + + /* If this softirq is serviced by ksoftirqd, we are likely under stress. + * Wait until our queues (qdisc + devices) are drained. + * This gives : + * - less callbacks to tcp_write_xmit(), reducing stress (batches) + * - chance for incoming ACK (processed by another cpu maybe) + * to migrate this flow (skb->ooo_okay will be eventually set) + */ + if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + goto out; if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { unsigned long flags; struct tsq_tasklet *tsq; - /* Keep a ref on socket. - * This last ref will be released in tcp_tasklet_func() - */ - atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); - /* queue this socket to tasklet queue */ local_irq_save(flags); tsq = this_cpu_ptr(&tsq_tasklet); list_add(&tp->tsq_node, &tsq->head); tasklet_schedule(&tsq->tasklet); local_irq_restore(flags); - } else { - sock_wfree(skb); + return; } +out: + sk_free(sk); } /* This routine actually transmits TCP packets queued in by @@ -914,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_ca_event(sk, CA_EVENT_TX_START); /* if no packet is in qdisc/device queue, then allow XPS to select - * another queue. + * another queue. We can be called from tcp_tsq_handler() + * which holds one reference to sk_wmem_alloc. + * + * TODO: Ideally, in-flight pure ACK packets should not matter here. + * One way to get this would be to set skb->truesize = 2 on them. */ - skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; + skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f5e319a8d4e..baf2742d1ec 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -235,7 +235,6 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; atomic_set(&aca->aca_refcnt, 1); - spin_lock_init(&aca->aca_lock); return aca; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index e25b633266c..2f25cb6347c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); + ireq->ir_iif = tcp_v6_iif(skb); ireq->ir_mark = inet_request_mark(sk, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cf2e45ab2fa..831495529b8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; + /* Note : We use inet6_iif() here, not tcp_v6_iif() */ req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) @@ -738,7 +739,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); + ireq->ir_iif = tcp_v6_iif(skb); if (!TCP_SKB_CB(skb)->tcp_tw_isn && (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || @@ -860,7 +861,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) - fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_oif = tcp_v6_iif(skb); else fl6.flowi6_oif = oif; fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); @@ -918,7 +919,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), &tcp_hashinfo, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), inet6_iif(skb)); + ntohs(th->source), tcp_v6_iif(skb)); if (!sk1) return; @@ -1000,13 +1001,14 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) /* Find possible connection requests. */ req = inet6_csk_search_req(sk, &prev, th->source, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, inet6_iif(skb)); + &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev, false); nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, - &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); + &ipv6_hdr(skb)->saddr, th->source, + &ipv6_hdr(skb)->daddr, ntohs(th->dest), + tcp_v6_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1090,7 +1092,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = inet6_iif(skb); + newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); if (np->repflow) @@ -1174,7 +1176,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; - newnp->mcast_oif = inet6_iif(skb); + newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); if (np->repflow) @@ -1360,7 +1362,7 @@ ipv6_pktoptions: if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - np->mcast_oif = inet6_iif(opt_skb); + np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) @@ -1427,7 +1429,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, + tcp_v6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1514,7 +1517,7 @@ do_time_wait: sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); @@ -1553,6 +1556,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (th->doff < sizeof(struct tcphdr) / 4) return; + /* Note : We use inet6_iif() here, not tcp_v6_iif() */ sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 71cf1bffea0..1b06a1fcf3e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -30,7 +30,7 @@ #include <linux/skbuff.h> #include <net/net_namespace.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 743262becd6..6ae063cebf7 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -20,8 +20,8 @@ #include <linux/in.h> #include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/slab.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <linux/inet.h> diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index c3073a2ef63..80dbd0beb51 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -23,7 +23,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 0b4bcb2bf38..00fbf1419ec 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -22,7 +22,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b976d5eff2d..96b64d2f6db 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -25,7 +25,7 @@ #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index ca40e2298f5..029c8bb90f4 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -22,7 +22,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index ff2c1b142f5..94d05806a9a 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -23,7 +23,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 62db02ba36b..2b78789ea7c 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -274,6 +274,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; else key->ip.frag = OVS_FRAG_TYPE_FIRST; + } else { + key->ip.frag = OVS_FRAG_TYPE_NONE; } nh_len = payload_ofs - nh_ofs; @@ -358,6 +360,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -557,10 +560,11 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) } else if (key->eth.type == htons(ETH_P_ARP) || key->eth.type == htons(ETH_P_RARP)) { struct arp_eth_header *arp; + bool arp_available = arphdr_ok(skb); arp = (struct arp_eth_header *)skb_network_header(skb); - if (arphdr_ok(skb) && + if (arp_available && arp->ar_hrd == htons(ARPHRD_ETHER) && arp->ar_pro == htons(ETH_P_IP) && arp->ar_hln == ETH_ALEN && @@ -673,9 +677,6 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, key->ovs_flow_hash = 0; key->recirc_id = 0; - /* Flags are always used as part of stats */ - key->tp.flags = 0; - return key_extract(skb, key); } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 368f2330791..939bcb32100 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -103,10 +103,19 @@ static void update_range__(struct sw_flow_match *match, SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ value_p, len, is_mask) -static u16 range_n_bytes(const struct sw_flow_key_range *range) -{ - return range->end - range->start; -} +#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + memset((u8 *)&(match)->mask->key.field, value,\ + sizeof((match)->mask->key.field)); \ + } else { \ + memset((u8 *)&(match)->key->field, value, \ + sizeof((match)->key->field)); \ + } \ + } while (0) static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs) @@ -809,13 +818,26 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val) +static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) { - u8 *m = (u8 *)&mask->key + range->start; + struct nlattr *nla; + int rem; + + /* The nlattr stream should already have been validated */ + nla_for_each_nested(nla, attr, rem) { + /* We assume that ovs_key_lens[type] == -1 means that type is a + * nested attribute + */ + if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) + nlattr_set(nla, val, false); + else + memset(nla_data(nla), val, nla_len(nla)); + } +} - mask->range = *range; - memset(m, val, range_n_bytes(range)); +static void mask_set_nlattr(struct nlattr *attr, u8 val) +{ + nlattr_set(attr, val, true); } /** @@ -836,6 +858,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; + struct nlattr *newmask = NULL; u64 key_attrs = 0; u64 mask_attrs = 0; bool encap_valid = false; @@ -882,18 +905,44 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (err) return err; + if (match->mask && !mask) { + /* Create an exact match mask. We need to set to 0xff all the + * 'match->mask' fields that have been touched in 'match->key'. + * We cannot simply memset 'match->mask', because padding bytes + * and fields not specified in 'match->key' should be left to 0. + * Instead, we use a stream of netlink attributes, copied from + * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care + * of filling 'match->mask' appropriately. + */ + newmask = kmemdup(key, nla_total_size(nla_len(key)), + GFP_KERNEL); + if (!newmask) + return -ENOMEM; + + mask_set_nlattr(newmask, 0xff); + + /* The userspace does not send tunnel attributes that are 0, + * but we should not wildcard them nonetheless. + */ + if (match->key->tun_key.ipv4_dst) + SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); + + mask = newmask; + } + if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) - return err; + goto free_newmask; - if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { + if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { __be16 eth_type = 0; __be16 tci = 0; if (!encap_valid) { OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); - return -EINVAL; + err = -EINVAL; + goto free_newmask; } mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); @@ -904,10 +953,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + if (err) + goto free_newmask; } else { OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", ntohs(eth_type)); - return -EINVAL; + err = -EINVAL; + goto free_newmask; } if (a[OVS_KEY_ATTR_VLAN]) @@ -915,23 +967,22 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (!(tci & htons(VLAN_TAG_PRESENT))) { OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); - return -EINVAL; + err = -EINVAL; + goto free_newmask; } } err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) - return err; - } else { - /* Populate exact match flow's key mask. */ - if (match->mask) - sw_flow_mask_set(match->mask, &match->range, 0xff); + goto free_newmask; } if (!match_validate(match, key_attrs, mask_attrs)) - return -EINVAL; + err = -EINVAL; - return 0; +free_newmask: + kfree(newmask); + return err; } /** diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 910b3ef2c0d..106a9d80b66 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -30,7 +30,7 @@ /** * struct geneve_port - Keeps track of open UDP ports - * @sock: The socket created for this port number. + * @gs: The socket created for this port number. * @name: vport name. */ struct geneve_port { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 53001b020ca..6015802ebe6 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -408,13 +408,13 @@ int ovs_vport_get_upcall_portids(const struct vport *vport, * * Returns the portid of the target socket. Must be called with rcu_read_lock. */ -u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb) +u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; u32 ids_index; u32 hash; - ids = rcu_dereference(p->upcall_portids); + ids = rcu_dereference(vport->upcall_portids); if (ids->n_ids == 1 && ids->ids[0] == 0) return 0; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4e37c1cbe8b..40084d843e9 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -564,12 +564,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, if (rs->rs_bound_addr == 0) { ret = -ENOTCONN; /* XXX not a great errno */ - goto out; + goto out_ret; } if (args->nr_local > UIO_MAXIOV) { ret = -EMSGSIZE; - goto out; + goto out_ret; } /* Check whether to allocate the iovec area */ @@ -578,7 +578,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL); if (!iovs) { ret = -ENOMEM; - goto out; + goto out_ret; } } @@ -696,6 +696,7 @@ out: if (iovs != iovstack) sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size); kfree(pages); +out_ret: if (ret) rds_rdma_free_op(op); else diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a88b8524846..f791edd64d6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1668,6 +1668,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( * ack chunk whose serial number matches that of the request. */ list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) { + if (sctp_chunk_pending(ack)) + continue; if (ack->subh.addip_hdr->serial == serial) { sctp_chunk_hold(ack); return ack; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 4de12afa13d..7e8a16c7703 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -140,18 +140,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) } else { /* Nothing to do. Next chunk in the packet, please. */ ch = (sctp_chunkhdr_t *) chunk->chunk_end; - /* Force chunk->skb->data to chunk->chunk_end. */ - skb_pull(chunk->skb, - chunk->chunk_end - chunk->skb->data); - - /* Verify that we have at least chunk headers - * worth of buffer left. - */ - if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) { - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - } + skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); + /* We are guaranteed to pull a SCTP header. */ } } @@ -187,24 +178,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < + skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { - /* RFC 2960, Section 6.10 Bundling - * - * Partial chunks MUST NOT be placed in an SCTP packet. - * If the receiver detects a partial chunk, it MUST drop - * the chunk. - * - * Since the end of the chunk is past the end of our buffer - * (which contains the whole packet, we can freely discard - * the whole packet. - */ - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - - return NULL; + /* Discard inside state machine. */ + chunk->pdiscard = 1; + chunk->chunk_end = skb_tail_pointer(chunk->skb); } else { /* We are at the end of the packet, so mark the chunk * in case we need to send a SACK. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ae0e616a7ca..ab734be8cb2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3110,50 +3110,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_NO_ERROR; } -/* Verify the ASCONF packet before we process it. */ -int sctp_verify_asconf(const struct sctp_association *asoc, - struct sctp_paramhdr *param_hdr, void *chunk_end, - struct sctp_paramhdr **errp) { - sctp_addip_param_t *asconf_param; +/* Verify the ASCONF packet before we process it. */ +bool sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, bool addr_param_needed, + struct sctp_paramhdr **errp) +{ + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; union sctp_params param; - int length, plen; - - param.v = (sctp_paramhdr_t *) param_hdr; - while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) { - length = ntohs(param.p->length); - *errp = param.p; + bool addr_param_seen = false; - if (param.v > chunk_end - length || - length < sizeof(sctp_paramhdr_t)) - return 0; + sctp_walk_params(param, addip, addip_hdr.params) { + size_t length = ntohs(param.p->length); + *errp = param.p; switch (param.p->type) { + case SCTP_PARAM_ERR_CAUSE: + break; + case SCTP_PARAM_IPV4_ADDRESS: + if (length != sizeof(sctp_ipv4addr_param_t)) + return false; + addr_param_seen = true; + break; + case SCTP_PARAM_IPV6_ADDRESS: + if (length != sizeof(sctp_ipv6addr_param_t)) + return false; + addr_param_seen = true; + break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: case SCTP_PARAM_SET_PRIMARY: - asconf_param = (sctp_addip_param_t *)param.v; - plen = ntohs(asconf_param->param_hdr.length); - if (plen < sizeof(sctp_addip_param_t) + - sizeof(sctp_paramhdr_t)) - return 0; + /* In ASCONF chunks, these need to be first. */ + if (addr_param_needed && !addr_param_seen) + return false; + length = ntohs(param.addip->param_hdr.length); + if (length < sizeof(sctp_addip_param_t) + + sizeof(sctp_paramhdr_t)) + return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: if (length != sizeof(sctp_addip_param_t)) - return 0; - + return false; break; default: - break; + /* This is unkown to us, reject! */ + return false; } - - param.v += WORD_ROUND(length); } - if (param.v != chunk_end) - return 0; + /* Remaining sanity checks. */ + if (addr_param_needed && !addr_param_seen) + return false; + if (!addr_param_needed && addr_param_seen) + return false; + if (param.v != chunk->chunk_end) + return false; - return 1; + return true; } /* Process an incoming ASCONF chunk with the next expected serial no. and @@ -3162,16 +3175,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; + bool all_param_pass = true; + union sctp_params param; sctp_addiphdr_t *hdr; union sctp_addr_param *addr_param; sctp_addip_param_t *asconf_param; struct sctp_chunk *asconf_ack; - __be16 err_code; int length = 0; int chunk_len; __u32 serial; - int all_param_pass = 1; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); hdr = (sctp_addiphdr_t *)asconf->skb->data; @@ -3199,9 +3213,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, goto done; /* Process the TLVs contained within the ASCONF chunk. */ - while (chunk_len > 0) { + sctp_walk_params(param, addip, addip_hdr.params) { + /* Skip preceeding address parameters. */ + if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || + param.p->type == SCTP_PARAM_IPV6_ADDRESS) + continue; + err_code = sctp_process_asconf_param(asoc, asconf, - asconf_param); + param.addip); /* ADDIP 4.1 A7) * If an error response is received for a TLV parameter, * all TLVs with no response before the failed TLV are @@ -3209,28 +3228,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * the failed response are considered unsuccessful unless * a specific success indication is present for the parameter. */ - if (SCTP_ERROR_NO_ERROR != err_code) - all_param_pass = 0; - + if (err_code != SCTP_ERROR_NO_ERROR) + all_param_pass = false; if (!all_param_pass) - sctp_add_asconf_response(asconf_ack, - asconf_param->crr_id, err_code, - asconf_param); + sctp_add_asconf_response(asconf_ack, param.addip->crr_id, + err_code, param.addip); /* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add * an IP address sends an 'Out of Resource' in its response, it * MUST also fail any subsequent add or delete requests bundled * in the ASCONF. */ - if (SCTP_ERROR_RSRC_LOW == err_code) + if (err_code == SCTP_ERROR_RSRC_LOW) goto done; - - /* Move to the next ASCONF param. */ - length = ntohs(asconf_param->param_hdr.length); - asconf_param = (void *)asconf_param + length; - chunk_len -= length; } - done: asoc->peer.addip_serial++; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c8f60632413..3ee27b7704f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -170,6 +170,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, { __u16 chunk_length = ntohs(chunk->chunk_hdr->length); + /* Previously already marked? */ + if (unlikely(chunk->pdiscard)) + return 0; if (unlikely(chunk_length < required_length)) return 0; @@ -3591,9 +3594,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, struct sctp_chunk *asconf_ack = NULL; struct sctp_paramhdr *err_param = NULL; sctp_addiphdr_t *hdr; - union sctp_addr_param *addr_param; __u32 serial; - int length; if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3618,17 +3619,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, hdr = (sctp_addiphdr_t *)chunk->skb->data; serial = ntohl(hdr->serial); - addr_param = (union sctp_addr_param *)hdr->params; - length = ntohs(addr_param->p.length); - if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, - (void *)addr_param, commands); - /* Verify the ASCONF chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)((void *)addr_param + length), - (void *)chunk->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, chunk, true, &err_param)) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); @@ -3745,10 +3737,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, rcvd_serial = ntohl(addip_hdr->serial); /* Verify the ASCONF-ACK chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)addip_hdr->params, - (void *)asconf_ack->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param)) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); diff --git a/net/tipc/link.c b/net/tipc/link.c index 65410e18b8a..1db162aa64a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1924,7 +1924,12 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) } omsg = buf_msg(obuf); pos += align(msg_size(omsg)); - if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) { + if (msg_isdata(omsg)) { + if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG)) + tipc_sk_mcast_rcv(obuf); + else + tipc_sk_rcv(obuf); + } else if (msg_user(omsg) == CONN_MANAGER) { tipc_sk_rcv(obuf); } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { tipc_named_rcv(obuf); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 8426a2aa8dc..e66314138b3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -481,6 +481,7 @@ next_inode: list_entry(sbsec->isec_head.next, struct inode_security_struct, list); struct inode *inode = isec->inode; + list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); inode = igrab(inode); if (inode) { @@ -489,7 +490,6 @@ next_inode: iput(inode); } spin_lock(&sbsec->isec_lock); - list_del_init(&isec->list); goto next_inode; } spin_unlock(&sbsec->isec_lock); |