summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Documentation/device-mapper/switch.txt12
-rw-r--r--Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt17
-rw-r--r--Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt12
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc.txt4
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,mmcif.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.txt8
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-st.txt33
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt12
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt54
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt1
-rw-r--r--Documentation/devicetree/changesets.txt40
-rw-r--r--Documentation/devicetree/todo.txt11
-rw-r--r--Documentation/infiniband/user_mad.txt13
-rw-r--r--Documentation/kbuild/00-INDEX2
-rw-r--r--Documentation/kbuild/headers_install.txt (renamed from Documentation/make/headers_install.txt)0
-rw-r--r--Documentation/kbuild/makefiles.txt39
-rw-r--r--Makefile30
-rw-r--r--arch/arm/boot/dts/versatile-ab.dts4
-rw-r--r--arch/arm/boot/dts/versatile-pb.dts2
-rw-r--r--arch/arm/xen/grant-table.c5
-rw-r--r--arch/ia64/Makefile2
-rw-r--r--arch/powerpc/boot/gunzip_util.c4
-rw-r--r--arch/powerpc/include/asm/cputable.h6
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/opal.h11
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h2
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h30
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/spinlock.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S110
-rw-r--r--arch/powerpc/kernel/head_44x.S4
-rw-r--r--arch/powerpc/kernel/iommu.c38
-rw-r--r--arch/powerpc/kernel/prom.c70
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/lib/locks.c4
-rw-r--r--arch/powerpc/mm/hash_native_64.c40
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c88
-rw-r--r--arch/powerpc/mm/numa.c13
-rw-r--r--arch/powerpc/mm/pgtable_64.c44
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/mm/tlb_nohash.c111
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/powermac/feature.c20
-rw-r--r--arch/powerpc/platforms/powermac/pci.c2
-rw-r--r--arch/powerpc/platforms/powermac/smp.c2
-rw-r--r--arch/powerpc/platforms/powermac/udbg_adb.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c4
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c20
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c20
-rw-r--r--arch/powerpc/platforms/pseries/setup.c3
-rw-r--r--arch/powerpc/xmon/xmon.c3
-rw-r--r--arch/x86/xen/grant-table.c10
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--block/bio-integrity.c12
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-core.c13
-rw-r--r--block/blk-mq.c81
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/compat_ioctl.c6
-rw-r--r--block/ioctl.c5
-rw-r--r--block/partitions/aix.c4
-rw-r--r--block/partitions/amiga.c12
-rw-r--r--block/partitions/efi.c46
-rw-r--r--block/partitions/msdos.c13
-rw-r--r--block/scsi_ioctl.c20
-rw-r--r--drivers/block/drbd/Makefile1
-rw-r--r--drivers/block/drbd/drbd_actlog.c518
-rw-r--r--drivers/block/drbd/drbd_bitmap.c150
-rw-r--r--drivers/block/drbd/drbd_debugfs.c958
-rw-r--r--drivers/block/drbd/drbd_debugfs.h39
-rw-r--r--drivers/block/drbd/drbd_int.h383
-rw-r--r--drivers/block/drbd/drbd_interval.h4
-rw-r--r--drivers/block/drbd/drbd_main.c302
-rw-r--r--drivers/block/drbd/drbd_nl.c110
-rw-r--r--drivers/block/drbd/drbd_proc.c125
-rw-r--r--drivers/block/drbd/drbd_receiver.c316
-rw-r--r--drivers/block/drbd/drbd_req.c527
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_state.c90
-rw-r--r--drivers/block/drbd/drbd_worker.c348
-rw-r--r--drivers/block/virtio_blk.c104
-rw-r--r--drivers/cpufreq/pmac64-cpufreq.c3
-rw-r--r--drivers/crypto/nx/nx-842.c30
-rw-r--r--drivers/edac/cell_edac.c3
-rw-r--r--drivers/hwmon/adm1025.c3
-rw-r--r--drivers/hwmon/adm1026.c3
-rw-r--r--drivers/hwmon/ads1015.c2
-rw-r--r--drivers/hwmon/asb100.c4
-rw-r--r--drivers/hwmon/dme1737.c33
-rw-r--r--drivers/hwmon/emc6w201.c4
-rw-r--r--drivers/hwmon/hih6130.c3
-rw-r--r--drivers/hwmon/lm87.c4
-rw-r--r--drivers/hwmon/lm92.c13
-rw-r--r--drivers/hwmon/pc87360.c3
-rw-r--r--drivers/hwmon/tmp103.c7
-rw-r--r--drivers/hwmon/vt1211.c3
-rw-r--r--drivers/hwmon/w83627hf.c3
-rw-r--r--drivers/hwmon/w83791d.c3
-rw-r--r--drivers/hwmon/w83793.c3
-rw-r--r--drivers/hwspinlock/Kconfig2
-rw-r--r--drivers/hwspinlock/omap_hwspinlock.c27
-rw-r--r--drivers/infiniband/core/agent.c16
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/iwcm.c27
-rw-r--r--drivers/infiniband/core/mad.c283
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/user_mad.c188
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c93
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c37
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c14
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c88
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c227
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c83
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h295
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c36
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c133
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c9
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c47
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c128
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c3
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bset.c2
-rw-r--r--drivers/md/bcache/bset.h2
-rw-r--r--drivers/md/bcache/btree.c50
-rw-r--r--drivers/md/bcache/btree.h5
-rw-r--r--drivers/md/bcache/extents.c13
-rw-r--r--drivers/md/bcache/extents.h1
-rw-r--r--drivers/md/bcache/journal.c24
-rw-r--r--drivers/md/bcache/request.c3
-rw-r--r--drivers/md/bcache/super.c57
-rw-r--r--drivers/md/bcache/util.h4
-rw-r--r--drivers/md/bcache/writeback.c14
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-cache-metadata.c4
-rw-r--r--drivers/md/dm-cache-metadata.h8
-rw-r--r--drivers/md/dm-cache-target.c128
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-io.c77
-rw-r--r--drivers/md/dm-mpath.c6
-rw-r--r--drivers/md/dm-switch.c67
-rw-r--r--drivers/md/dm-table.c86
-rw-r--r--drivers/md/dm-thin.c181
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/mfd/rtsx_usb.c1
-rw-r--r--drivers/mmc/card/block.c6
-rw-r--r--drivers/mmc/core/bus.c10
-rw-r--r--drivers/mmc/core/core.c3
-rw-r--r--drivers/mmc/core/mmc.c11
-rw-r--r--drivers/mmc/core/quirks.c2
-rw-r--r--drivers/mmc/core/sd_ops.c3
-rw-r--r--drivers/mmc/host/Kconfig28
-rw-r--r--drivers/mmc/host/Makefile1
-rw-r--r--drivers/mmc/host/dw_mmc.c98
-rw-r--r--drivers/mmc/host/dw_mmc.h5
-rw-r--r--drivers/mmc/host/mmci.c168
-rw-r--r--drivers/mmc/host/mmci.h20
-rw-r--r--drivers/mmc/host/moxart-mmc.c1
-rw-r--r--drivers/mmc/host/mxs-mmc.c3
-rw-r--r--drivers/mmc/host/omap_hsmmc.c283
-rw-r--r--drivers/mmc/host/s3cmci.c186
-rw-r--r--drivers/mmc/host/s3cmci.h4
-rw-r--r--drivers/mmc/host/sdhci-acpi.c4
-rw-r--r--drivers/mmc/host/sdhci-msm.c1
-rw-r--r--drivers/mmc/host/sdhci-pci.c38
-rw-r--r--drivers/mmc/host/sdhci-pci.h1
-rw-r--r--drivers/mmc/host/sdhci-pxav3.c13
-rw-r--r--drivers/mmc/host/sdhci-st.c176
-rw-r--r--drivers/mmc/host/sdhci-tegra.c2
-rw-r--r--drivers/mmc/host/sdhci.c144
-rw-r--r--drivers/mmc/host/sh_mmcif.c96
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c2
-rw-r--r--drivers/mmc/host/wmt-sdmmc.c33
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c7
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c3
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.c18
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.h3
-rw-r--r--drivers/net/ethernet/ibm/ehea/Makefile2
-rw-r--r--drivers/net/ethernet/intel/e1000e/manage.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_fcoe.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c26
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c88
-rw-r--r--drivers/net/ethernet/sun/sunvnet.c38
-rw-r--r--drivers/net/ethernet/sun/sunvnet.h4
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c1
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/net/macvlan.c12
-rw-r--r--drivers/net/wireless/ath/carl9170/carl9170.h1
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c31
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c4
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/pcie.c3
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.c1
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/mac80211.c3
-rw-r--r--drivers/net/xen-netback/common.h5
-rw-r--r--drivers/net/xen-netback/interface.c56
-rw-r--r--drivers/net/xen-netback/netback.c26
-rw-r--r--drivers/net/xen-netback/xenbus.c17
-rw-r--r--drivers/of/Kconfig3
-rw-r--r--drivers/of/Makefile4
-rw-r--r--drivers/of/base.c451
-rw-r--r--drivers/of/device.c4
-rw-r--r--drivers/of/dynamic.c660
-rw-r--r--drivers/of/fdt.c22
-rw-r--r--drivers/of/of_private.h59
-rw-r--r--drivers/of/of_reserved_mem.c70
-rw-r--r--drivers/of/platform.c32
-rw-r--r--drivers/of/selftest.c235
-rw-r--r--drivers/of/testcase-data/testcases.dts15
-rw-r--r--drivers/of/testcase-data/testcases.dtsi4
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c4
-rw-r--r--drivers/scsi/cxgbi/cxgb3i/Kconfig2
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/Kconfig2
-rw-r--r--drivers/scsi/scsi_transport_srp.c3
-rw-r--r--drivers/tty/ehv_bytechan.c43
-rw-r--r--drivers/tty/hvc/hvc_opal.c15
-rw-r--r--drivers/tty/hvc/hvc_vio.c29
-rw-r--r--drivers/tty/serial/pmac_zilog.c9
-rw-r--r--drivers/tty/serial/serial_core.c3
-rw-r--r--drivers/vfio/Kconfig6
-rw-r--r--drivers/vfio/Makefile2
-rw-r--r--drivers/vfio/pci/vfio_pci.c161
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h3
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c17
-rw-r--r--include/linux/bio.h1
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/drbd.h4
-rw-r--r--include/linux/drbd_genl.h4
-rw-r--r--include/linux/drbd_limits.h6
-rw-r--r--include/linux/mlx4/cmd.h7
-rw-r--r--include/linux/mlx4/device.h17
-rw-r--r--include/linux/mmc/dw_mmc.h2
-rw-r--r--include/linux/mmc/sdhci.h3
-rw-r--r--include/linux/of.h87
-rw-r--r--include/linux/of_platform.h7
-rw-r--r--include/linux/of_reserved_mem.h7
-rw-r--r--include/linux/platform_data/mmc-omap.h1
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/rhashtable.h17
-rw-r--r--include/linux/vfio.h6
-rw-r--r--include/net/inet_connection_sock.h1
-rw-r--r--include/net/sock.h1
-rw-r--r--include/net/tcp.h7
-rw-r--r--include/rdma/ib_mad.h18
-rw-r--r--include/rdma/ib_verbs.h10
-rw-r--r--include/scsi/sg.h4
-rw-r--r--include/trace/events/bcache.h21
-rw-r--r--include/trace/events/thp.h88
-rw-r--r--include/uapi/linux/bsg.h11
-rw-r--r--include/uapi/linux/virtio_blk.h5
-rw-r--r--include/uapi/rdma/ib_user_mad.h42
-rw-r--r--include/uapi/rdma/ib_user_verbs.h16
-rw-r--r--include/uapi/rdma/rdma_user_cm.h1
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/printk/printk.c12
-rw-r--r--kernel/seccomp.c10
-rw-r--r--kernel/time/timekeeping.c5
-rw-r--r--lib/Kconfig.debug24
-rw-r--r--lib/lru_cache.c23
-rw-r--r--lib/rhashtable.c10
-rw-r--r--mm/hugetlb_cgroup.c1
-rw-r--r--net/atm/lec.c5
-rw-r--r--net/atm/svc.c60
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_input.c27
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_metrics.c6
-rw-r--r--net/ipv4/tcp_output.c24
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/openvswitch/vport.c4
-rw-r--r--scripts/Kbuild.include14
-rw-r--r--scripts/Makefile.clean4
-rw-r--r--scripts/Makefile.extrawarn21
-rw-r--r--scripts/Makefile.host61
-rw-r--r--scripts/coccinelle/api/alloc/alloc_cast.cocci72
-rw-r--r--scripts/coccinelle/misc/array_size.cocci87
-rw-r--r--scripts/coccinelle/misc/badty.cocci76
-rw-r--r--scripts/coccinelle/misc/bugon.cocci (renamed from scripts/coccinelle/api/alloc/drop_kmalloc_cast.cocci)47
-rw-r--r--scripts/coccinelle/null/badzero.cocci3
-rw-r--r--sound/ppc/pmac.c6
319 files changed, 9170 insertions, 3788 deletions
diff --git a/.gitignore b/.gitignore
index f4c0b091dcf..e213b27f392 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@
*.gcno
modules.builtin
Module.symvers
+*.dwo
#
# Top-level generic files
diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt
index 2fa749387be..8897d049483 100644
--- a/Documentation/device-mapper/switch.txt
+++ b/Documentation/device-mapper/switch.txt
@@ -106,6 +106,11 @@ which paths.
The path number in the range 0 ... (<num_paths> - 1).
Expressed in hexadecimal (WITHOUT any prefix like 0x).
+R<n>,<m>
+ This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+ are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+ slots.
+
Status
======
@@ -124,3 +129,10 @@ Create a switch device with 64kB region size:
Set mappings for the first 7 entries to point to devices switch0, switch1,
switch2, switch0, switch1, switch2, switch1:
dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command:
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+is equivalent to:
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+ :1 :2 :1 :2 :1 :2 :1 :2 :1 :2
+
diff --git a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
index 532b1d440ab..6cd3525d0e0 100644
--- a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
@@ -46,13 +46,14 @@ Required Properties:
- if CIU clock divider value is 0 (that is divide by 1), both tx and rx
phase shift clocks should be 0.
-Required properties for a slot:
+Required properties for a slot (Deprecated - Recommend to use one slot per host):
* gpios: specifies a list of gpios used for command, clock and data bus. The
first gpio is the command line and the second gpio is the clock line. The
rest of the gpios (depending on the bus-width property) are the data lines in
no particular order. The format of the gpio specifier depends on the gpio
controller.
+(Deprecated - Refer to Documentation/devicetree/binding/pinctrl/samsung-pinctrl.txt)
Example:
@@ -69,21 +70,13 @@ Example:
dwmmc0@12200000 {
num-slots = <1>;
- supports-highspeed;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
broken-cd;
fifo-depth = <0x80>;
card-detect-delay = <200>;
samsung,dw-mshc-ciu-div = <3>;
samsung,dw-mshc-sdr-timing = <2 3>;
samsung,dw-mshc-ddr-timing = <1 2>;
-
- slot@0 {
- reg = <0>;
- bus-width = <8>;
- gpios = <&gpc0 0 2 0 3>, <&gpc0 1 2 0 3>,
- <&gpc1 0 2 3 3>, <&gpc1 1 2 3 3>,
- <&gpc1 2 2 3 3>, <&gpc1 3 2 3 3>,
- <&gpc0 3 2 3 3>, <&gpc0 4 2 3 3>,
- <&gpc0 5 2 3 3>, <&gpc0 6 2 3 3>;
- };
+ bus-width = <8>;
};
diff --git a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
index e5bc49f764d..3b354493143 100644
--- a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
@@ -34,13 +34,11 @@ Example:
num-slots = <1>;
vmmc-supply = <&ldo12>;
fifo-depth = <0x100>;
- supports-highspeed;
pinctrl-names = "default";
pinctrl-0 = <&sd_pmx_pins &sd_cfg_func1 &sd_cfg_func2>;
- slot@0 {
- reg = <0>;
- bus-width = <4>;
- disable-wp;
- cd-gpios = <&gpio10 3 0>;
- };
+ bus-width = <4>;
+ disable-wp;
+ cd-gpios = <&gpio10 3 0>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
};
diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 3c18001dfd5..431716e37a3 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -34,8 +34,8 @@ Optional properties:
- cap-power-off-card: powering off the card is safe
- cap-sdio-irq: enable SDIO IRQ signalling on this interface
- full-pwr-cycle: full power cycle of the card is supported
-- mmc-highspeed-ddr-1_8v: eMMC high-speed DDR mode(1.8V I/O) is supported
-- mmc-highspeed-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
+- mmc-ddr-1_8v: eMMC high-speed DDR mode(1.8V I/O) is supported
+- mmc-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
- mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported
- mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
new file mode 100644
index 00000000000..299081f94ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
@@ -0,0 +1,32 @@
+* Renesas Multi Media Card Interface (MMCIF) Controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the MMCIF device.
+
+
+Required properties:
+
+- compatible: must contain one of the following
+ - "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs
+ - "renesas,mmcif-r8a7790" for the MMCIF found in r8a7790 SoCs
+ - "renesas,mmcif-r8a7791" for the MMCIF found in r8a7791 SoCs
+ - "renesas,sh-mmcif" for the generic MMCIF
+
+- clocks: reference to the functional clock
+
+- dmas: reference to the DMA channels, one per channel name listed in the
+ dma-names property.
+- dma-names: must contain "tx" for the transmit DMA channel and "rx" for the
+ receive DMA channel.
+
+
+Example: R8A7790 (R-Car H2) MMCIF0
+
+ mmcif0: mmc@ee200000 {
+ compatible = "renesas,mmcif-r8a7790", "renesas,sh-mmcif";
+ reg = <0 0xee200000 0 0x80>;
+ interrupts = <0 169 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp3_clks R8A7790_CLK_MMCIF0>;
+ dmas = <&dmac0 0xd1>, <&dmac0 0xd2>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index 81b33b5b20f..485483a63d8 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -27,8 +27,8 @@ Example:
bus-width = <8>;
non-removable;
- vmmc = <&pm8941_l20>;
- vqmmc = <&pm8941_s3>;
+ vmmc-supply = <&pm8941_l20>;
+ vqmmc-supply = <&pm8941_s3>;
pinctrl-names = "default";
pinctrl-0 = <&sdc1_clk &sdc1_cmd &sdc1_data>;
@@ -44,8 +44,8 @@ Example:
bus-width = <4>;
cd-gpios = <&msmgpio 62 0x1>;
- vmmc = <&pm8941_l21>;
- vqmmc = <&pm8941_l13>;
+ vmmc-supply = <&pm8941_l21>;
+ vqmmc-supply = <&pm8941_l13>;
pinctrl-names = "default";
pinctrl-0 = <&sdc2_clk &sdc2_cmd &sdc2_data>;
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
new file mode 100644
index 00000000000..7527db447a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -0,0 +1,33 @@
+* STMicroelectronics sdhci-st MMC/SD controller
+
+This file documents the differences between the core properties in
+Documentation/devicetree/bindings/mmc/mmc.txt and the properties
+used by the sdhci-st driver.
+
+Required properties:
+- compatible : Must be "st,sdhci"
+- clock-names : Should be "mmc"
+ See: Documentation/devicetree/bindings/resource-names.txt
+- clocks : Phandle of the clock used by the sdhci controler
+ See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Optional properties:
+- non-removable: non-removable slot
+ See: Documentation/devicetree/bindings/mmc/mmc.txt
+- bus-width: Number of data lines
+ See: Documentation/devicetree/bindings/mmc/mmc.txt
+
+Example:
+
+mmc0: sdhci@fe81e000 {
+ compatible = "st,sdhci";
+ status = "disabled";
+ reg = <0xfe81e000 0x1000>;
+ interrupts = <GIC_SPI 127 IRQ_TYPE_NONE>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mmc0>;
+ clock-names = "mmc";
+ clocks = <&clk_s_a1_ls 1>;
+ bus-width = <8>
+};
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
index 2d4a7258a10..346c6095a61 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
@@ -67,7 +67,8 @@ Optional properties:
* card-detect-delay: Delay in milli-seconds before detecting card after card
insert event. The default value is 0.
-* supports-highspeed: Enables support for high speed cards (up to 50MHz)
+* supports-highspeed (DEPRECATED): Enables support for high speed cards (up to 50MHz)
+ (use "cap-mmc-highspeed" or "cap-sd-highspeed" instead)
* broken-cd: as documented in mmc core bindings.
@@ -98,14 +99,11 @@ board specific portions as listed below.
clock-frequency = <400000000>;
clock-freq-min-max = <400000 200000000>;
num-slots = <1>;
- supports-highspeed;
broken-cd;
fifo-depth = <0x80>;
card-detect-delay = <200>;
vmmc-supply = <&buck8>;
-
- slot@0 {
- reg = <0>;
- bus-width = <8>;
- };
+ bus-width = <8>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
};
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index ce8056116fb..76bf087bc88 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -12,6 +12,7 @@ Required properties:
Should be "ti,omap3-hsmmc", for OMAP3 controllers
Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0
Should be "ti,omap4-hsmmc", for OMAP4 controllers
+ Should be "ti,am33xx-hsmmc", for AM335x controllers
- ti,hwmods: Must be "mmc<n>", n is controller instance starting 1
Optional properties:
@@ -56,3 +57,56 @@ Examples:
&edma 25>;
dma-names = "tx", "rx";
};
+
+[workaround for missing swakeup on am33xx]
+
+This SOC is missing the swakeup line, it will not detect SDIO irq
+while in suspend.
+
+ ------
+ | PRCM |
+ ------
+ ^ |
+ swakeup | | fclk
+ | v
+ ------ ------- -----
+ | card | -- CIRQ --> | hsmmc | -- IRQ --> | CPU |
+ ------ ------- -----
+
+In suspend the fclk is off and the module is disfunctional. Even register reads
+will fail. A small logic in the host will request fclk restore, when an
+external event is detected. Once the clock is restored, the host detects the
+event normally. Since am33xx doesn't have this line it never wakes from
+suspend.
+
+The workaround is to reconfigure the dat1 line as a GPIO upon suspend. To make
+this work, we need to set the named pinctrl states "default" and "idle".
+Prepare idle to remux dat1 as a gpio, and default to remux it back as sdio
+dat1. The MMC driver will then toggle between idle and default state during
+runtime.
+
+In summary:
+1. select matching 'compatible' section, see example below.
+2. specify pinctrl states "default" and "idle", "sleep" is optional.
+3. specify the gpio irq used for detecting sdio irq in suspend
+
+If configuration is incomplete, a warning message is emitted "falling back to
+polling". Also check the "sdio irq mode" in /sys/kernel/debug/mmc0/regs. Mind
+not every application needs SDIO irq, e.g. MMC cards.
+
+ mmc1: mmc@48060100 {
+ compatible = "ti,am33xx-hsmmc";
+ ...
+ pinctrl-names = "default", "idle", "sleep"
+ pinctrl-0 = <&mmc1_pins>;
+ pinctrl-1 = <&mmc1_idle>;
+ pinctrl-2 = <&mmc1_sleep>;
+ ...
+ interrupts-extended = <&intc 64 &gpio2 28 0>;
+ };
+
+ mmc1_idle : pinmux_cirq_pin {
+ pinctrl-single,pins = <
+ 0x0f8 0x3f /* GPIO2_28 */
+ >;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 6a2a1160a70..fa0f327cde0 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -18,6 +18,7 @@ Required properties:
"renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
"renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC
"renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC
+ "renesas,sdhi-r8a7791" - SDHI IP on R8A7791 SoC
Optional properties:
- toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt
new file mode 100644
index 00000000000..935ba5acc34
--- /dev/null
+++ b/Documentation/devicetree/changesets.txt
@@ -0,0 +1,40 @@
+A DT changeset is a method which allows one to apply changes
+in the live tree in such a way that either the full set of changes
+will be applied, or none of them will be. If an error occurs partway
+through applying the changeset, then the tree will be rolled back to the
+previous state. A changeset can also be removed after it has been
+applied.
+
+When a changeset is applied, all of the changes get applied to the tree
+at once before emitting OF_RECONFIG notifiers. This is so that the
+receiver sees a complete and consistent state of the tree when it
+receives the notifier.
+
+The sequence of a changeset is as follows.
+
+1. of_changeset_init() - initializes a changeset
+
+2. A number of DT tree change calls, of_changeset_attach_node(),
+of_changeset_detach_node(), of_changeset_add_property(),
+of_changeset_remove_property, of_changeset_update_property() to prepare
+a set of changes. No changes to the active tree are made at this point.
+All the change operations are recorded in the of_changeset 'entries'
+list.
+
+3. mutex_lock(of_mutex) - starts a changeset; The global of_mutex
+ensures there can only be one editor at a time.
+
+4. of_changeset_apply() - Apply the changes to the tree. Either the
+entire changeset will get applied, or if there is an error the tree will
+be restored to the previous state
+
+5. mutex_unlock(of_mutex) - All operations complete, release the mutex
+
+If a successfully applied changeset needs to be removed, it can be done
+with the following sequence.
+
+1. mutex_lock(of_mutex)
+
+2. of_changeset_revert()
+
+3. mutex_unlock(of_mutex)
diff --git a/Documentation/devicetree/todo.txt b/Documentation/devicetree/todo.txt
new file mode 100644
index 00000000000..c3cf0659bd1
--- /dev/null
+++ b/Documentation/devicetree/todo.txt
@@ -0,0 +1,11 @@
+Todo list for devicetree:
+
+=== General structure ===
+- Switch from custom lists to (h)list_head for nodes and properties structure
+- Remove of_allnodes list and iterate using list of child nodes alone
+
+=== CONFIG_OF_DYNAMIC ===
+- Switch to RCU for tree updates and get rid of global spinlock
+- Document node lifecycle for CONFIG_OF_DYNAMIC
+- Always set ->full_name at of_attach_node() time
+- pseries: Get rid of open-coded tree modification from arch/powerpc/platforms/pseries/dlpar.c
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 8a366959f5c..7aca13a54a3 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -26,6 +26,11 @@ Creating MAD agents
ioctl. Also, all agents registered through a file descriptor will
be unregistered when the descriptor is closed.
+ 2014 -- a new registration ioctl is now provided which allows additional
+ fields to be provided during registration.
+ Users of this registration call are implicitly setting the use of
+ pkey_index (see below).
+
Receiving MADs
MADs are received using read(). The receive side now supports
@@ -104,10 +109,10 @@ P_Key Index Handling
The old ib_umad interface did not allow setting the P_Key index for
MADs that are sent and did not provide a way for obtaining the P_Key
index of received MADs. A new layout for struct ib_user_mad_hdr
- with a pkey_index member has been defined; however, to preserve
- binary compatibility with older applications, this new layout will
- not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
- before a file descriptor is used for anything else.
+ with a pkey_index member has been defined; however, to preserve binary
+ compatibility with older applications, this new layout will not be used
+ unless one of IB_USER_MAD_ENABLE_PKEY or IB_USER_MAD_REGISTER_AGENT2 ioctl's
+ are called before a file descriptor is used for anything else.
In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
to 6, the new layout of struct ib_user_mad_hdr will be used by
diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
index e8d2b6d83a3..8c5e6aa7800 100644
--- a/Documentation/kbuild/00-INDEX
+++ b/Documentation/kbuild/00-INDEX
@@ -1,5 +1,7 @@
00-INDEX
- this file: info on the kernel build process
+headers_install.txt
+ - how to export Linux headers for use by userspace
kbuild.txt
- developer information on kbuild
kconfig.txt
diff --git a/Documentation/make/headers_install.txt b/Documentation/kbuild/headers_install.txt
index 951eb9f1e04..951eb9f1e04 100644
--- a/Documentation/make/headers_install.txt
+++ b/Documentation/kbuild/headers_install.txt
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index c600e2f44a6..764f5991a3f 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -23,11 +23,10 @@ This document describes the Linux kernel Makefiles.
=== 4 Host Program support
--- 4.1 Simple Host Program
--- 4.2 Composite Host Programs
- --- 4.3 Defining shared libraries
- --- 4.4 Using C++ for host programs
- --- 4.5 Controlling compiler options for host programs
- --- 4.6 When host programs are actually built
- --- 4.7 Using hostprogs-$(CONFIG_FOO)
+ --- 4.3 Using C++ for host programs
+ --- 4.4 Controlling compiler options for host programs
+ --- 4.5 When host programs are actually built
+ --- 4.6 Using hostprogs-$(CONFIG_FOO)
=== 5 Kbuild clean infrastructure
@@ -643,29 +642,7 @@ Both possibilities are described in the following.
Finally, the two .o files are linked to the executable, lxdialog.
Note: The syntax <executable>-y is not permitted for host-programs.
---- 4.3 Defining shared libraries
-
- Objects with extension .so are considered shared libraries, and
- will be compiled as position independent objects.
- Kbuild provides support for shared libraries, but the usage
- shall be restricted.
- In the following example the libkconfig.so shared library is used
- to link the executable conf.
-
- Example:
- #scripts/kconfig/Makefile
- hostprogs-y := conf
- conf-objs := conf.o libkconfig.so
- libkconfig-objs := expr.o type.o
-
- Shared libraries always require a corresponding -objs line, and
- in the example above the shared library libkconfig is composed by
- the two objects expr.o and type.o.
- expr.o and type.o will be built as position independent code and
- linked as a shared library libkconfig.so. C++ is not supported for
- shared libraries.
-
---- 4.4 Using C++ for host programs
+--- 4.3 Using C++ for host programs
kbuild offers support for host programs written in C++. This was
introduced solely to support kconfig, and is not recommended
@@ -688,7 +665,7 @@ Both possibilities are described in the following.
qconf-cxxobjs := qconf.o
qconf-objs := check.o
---- 4.5 Controlling compiler options for host programs
+--- 4.4 Controlling compiler options for host programs
When compiling host programs, it is possible to set specific flags.
The programs will always be compiled utilising $(HOSTCC) passed
@@ -716,7 +693,7 @@ Both possibilities are described in the following.
When linking qconf, it will be passed the extra option
"-L$(QTDIR)/lib".
---- 4.6 When host programs are actually built
+--- 4.5 When host programs are actually built
Kbuild will only build host-programs when they are referenced
as a prerequisite.
@@ -747,7 +724,7 @@ Both possibilities are described in the following.
This will tell kbuild to build lxdialog even if not referenced in
any rule.
---- 4.7 Using hostprogs-$(CONFIG_FOO)
+--- 4.6 Using hostprogs-$(CONFIG_FOO)
A typical pattern in a Kbuild file looks like this:
diff --git a/Makefile b/Makefile
index a897c50db51..6aace675056 100644
--- a/Makefile
+++ b/Makefile
@@ -372,6 +372,7 @@ GENKSYMS = scripts/genksyms/genksyms
INSTALLKERNEL := installkernel
DEPMOD = /sbin/depmod
PERL = perl
+PYTHON = python
CHECK = sparse
CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
@@ -422,7 +423,7 @@ KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(S
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
export CPP AR NM STRIP OBJCOPY OBJDUMP
-export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
+export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
@@ -687,6 +688,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
# source of a reference will be _MergedGlobals and not on of the whitelisted names.
# See modpost pattern 2
KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
else
# This warning generated too much noise in a regular build.
@@ -710,9 +712,16 @@ endif
KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments)
ifdef CONFIG_DEBUG_INFO
+ifdef CONFIG_DEBUG_INFO_SPLIT
+KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
+else
KBUILD_CFLAGS += -g
+endif
KBUILD_AFLAGS += -Wa,-gdwarf-2
endif
+ifdef CONFIG_DEBUG_INFO_DWARF4
+KBUILD_CFLAGS += $(call cc-option, -gdwarf-4,)
+endif
ifdef CONFIG_DEBUG_INFO_REDUCED
KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) \
@@ -1055,6 +1064,13 @@ headers_check: headers_install
$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1
# ---------------------------------------------------------------------------
+# Kernel selftest
+
+PHONY += kselftest
+kselftest:
+ $(Q)$(MAKE) -C tools/testing/selftests run_tests
+
+# ---------------------------------------------------------------------------
# Modules
ifdef CONFIG_MODULES
@@ -1241,9 +1257,9 @@ help:
@echo ' tags/TAGS - Generate tags file for editors'
@echo ' cscope - Generate cscope index'
@echo ' gtags - Generate GNU GLOBAL index'
- @echo ' kernelrelease - Output the release version string'
- @echo ' kernelversion - Output the version stored in Makefile'
- @echo ' image_name - Output the image name'
+ @echo ' kernelrelease - Output the release version string (use with make -s)'
+ @echo ' kernelversion - Output the version stored in Makefile (use with make -s)'
+ @echo ' image_name - Output the image name (use with make -s)'
@echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \
echo ' (default: $(INSTALL_HDR_PATH))'; \
echo ''
@@ -1257,6 +1273,11 @@ help:
@echo ' headerdep - Detect inclusion cycles in headers'
@$(MAKE) -f $(srctree)/scripts/Makefile.help checker-help
@echo ''
+ @echo 'Kernel selftest'
+ @echo ' kselftest - Build and run kernel selftest (run as root)'
+ @echo ' Build, install, and boot kernel before'
+ @echo ' running kselftest on it'
+ @echo ''
@echo 'Kernel packaging:'
@$(MAKE) $(build)=$(package-dir) help
@echo ''
@@ -1398,6 +1419,7 @@ clean: $(clean-dirs)
@find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \
\( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
-o -name '*.ko.*' \
+ -o -name '*.dwo' \
-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
-o -name '*.symtypes' -o -name 'modules.order' \
-o -name modules.builtin -o -name '.tmp_*.o.*' \
diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts
index 36c771a2d76..27d0d9c8adf 100644
--- a/arch/arm/boot/dts/versatile-ab.dts
+++ b/arch/arm/boot/dts/versatile-ab.dts
@@ -15,6 +15,10 @@
i2c0 = &i2c0;
};
+ chosen {
+ stdout-path = &uart0;
+ };
+
memory {
reg = <0x0 0x08000000>;
};
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index d025048119d..e36c1e82fea 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -56,5 +56,3 @@
};
};
};
-
-#include <testcases.dtsi>
diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c
index 2c4041c9bac..e43791829ac 100644
--- a/arch/arm/xen/grant-table.c
+++ b/arch/arm/xen/grant-table.c
@@ -49,8 +49,3 @@ int arch_gnttab_init(unsigned long nr_shared)
{
return 0;
}
-
-int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
-{
- return 0;
-}
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f37238f45bc..5441b14994f 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -76,7 +76,7 @@ vmlinux.gz: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
unwcheck: vmlinux
- -$(Q)READELF=$(READELF) python $(srctree)/arch/ia64/scripts/unwcheck.py $<
+ -$(Q)READELF=$(READELF) $(PYTHON) $(srctree)/arch/ia64/scripts/unwcheck.py $<
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c
index ef2aed0f63c..9dc52501de8 100644
--- a/arch/powerpc/boot/gunzip_util.c
+++ b/arch/powerpc/boot/gunzip_util.c
@@ -112,10 +112,10 @@ int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen)
r = zlib_inflate(&state->s, Z_FULL_FLUSH);
if (r != Z_OK && r != Z_STREAM_END)
fatal("inflate returned %d msg: %s\n\r", r, state->s.msg);
- len = state->s.next_out - (unsigned char *)dst;
+ len = state->s.next_out - (Byte *)dst;
} else {
/* uncompressed image */
- len = min(state->s.avail_in, (unsigned)dstlen);
+ len = min(state->s.avail_in, (uLong)dstlen);
memcpy(dst, state->s.next_in, len);
state->s.next_in += len;
state->s.avail_in -= len;
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 642e436d459..daa5af91163 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -459,7 +459,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
- CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX)
+ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
+ CPU_FTRS_PA6T | CPU_FTR_VSX)
#endif
#else
enum {
@@ -509,7 +510,8 @@ enum {
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
- CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
+ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
+ CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 44e90516519..b125ceab149 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -57,10 +57,10 @@ struct machdep_calls {
void (*hpte_removebolted)(unsigned long ea,
int psize, int ssize);
void (*flush_hash_range)(unsigned long number, int local);
- void (*hugepage_invalidate)(struct mm_struct *mm,
+ void (*hugepage_invalidate)(unsigned long vsid,
+ unsigned long addr,
unsigned char *hpte_slot_array,
- unsigned long addr, int psize);
-
+ int psize, int ssize);
/* special for kexec, to be called in real mode, linear mapping is
* destroyed as well */
void (*hpte_clear_all)(void);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index b2f8ce1fd0d..86055e59826 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -149,6 +149,8 @@ struct opal_sg_list {
#define OPAL_DUMP_INFO2 94
#define OPAL_PCI_EEH_FREEZE_SET 97
#define OPAL_HANDLE_HMI 98
+#define OPAL_REGISTER_DUMP_REGION 101
+#define OPAL_UNREGISTER_DUMP_REGION 102
#ifndef __ASSEMBLY__
@@ -920,6 +922,8 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
uint64_t length);
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_handle_hmi(void);
+int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
+int64_t opal_unregister_dump_region(uint32_t id);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -974,6 +978,13 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
unsigned long vmalloc_size);
void opal_free_sg_list(struct opal_sg_list *sg);
+/*
+ * Dump region ID range usable by the OS
+ */
+#define OPAL_DUMP_REGION_HOST_START 0x80
+#define OPAL_DUMP_REGION_LOG_BUF 0x80
+#define OPAL_DUMP_REGION_HOST_END 0xFF
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index eb9261024f5..7b3d54fae46 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -413,7 +413,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
}
extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp);
+ pmd_t *pmdp, unsigned long old_pmd);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index b6d2d42f84b..4f4ec2ab45c 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -46,11 +46,31 @@
* in order to deal with 64K made of 4K HW pages. Thus we override the
* generic accessors and iterators here
*/
-#define __real_pte(e,p) ((real_pte_t) { \
- (e), (pte_val(e) & _PAGE_COMBO) ? \
- (pte_val(*((p) + PTRS_PER_PTE))) : 0 })
-#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
- (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
+#define __real_pte __real_pte
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+{
+ real_pte_t rpte;
+
+ rpte.pte = pte;
+ rpte.hidx = 0;
+ if (pte_val(pte) & _PAGE_COMBO) {
+ /*
+ * Make sure we order the hidx load against the _PAGE_COMBO
+ * check. The store side ordering is done in __hash_page_4K
+ */
+ smp_rmb();
+ rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
+ }
+ return rpte;
+}
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+ if ((pte_val(rpte.pte) & _PAGE_COMBO))
+ return (rpte.hidx >> (index<<2)) & 0xf;
+ return (pte_val(rpte.pte) >> 12) & 0xf;
+}
+
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_sub_valid(rpte, index) \
(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c987bf794e..0c0505956a2 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -213,9 +213,8 @@
#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
-#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
-#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
+#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 35aa339410b..4dbe072eecb 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -61,6 +61,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
+ smp_mb();
return !arch_spin_value_unlocked(*lock);
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6144d5a6bfe..050f79a4a16 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -592,61 +592,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
- .globl hmi_exception_early
-hmi_exception_early:
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
- mr r10,r1 /* Save r1 */
- ld r1,PACAEMERGSP(r13) /* Use emergency stack */
- subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- std r9,_CCR(r1) /* save CR in stackframe */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- std r11,_NIP(r1) /* save HSRR0 in stackframe */
- mfspr r12,SPRN_HSRR1 /* Save SRR1 */
- std r12,_MSR(r1) /* save SRR1 in stackframe */
- std r10,0(r1) /* make stack chain pointer */
- std r0,GPR0(r1) /* save r0 in stackframe */
- std r10,GPR1(r1) /* save r1 in stackframe */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl hmi_exception_realmode
- /* Windup the stack. */
- /* Clear MSR_RI before setting SRR0 and SRR1. */
- li r0,MSR_RI
- mfmsr r9 /* get MSR value */
- andc r9,r9,r0
- mtmsrd r9,1 /* Clear MSR_RI */
- /* Move original HSRR0 and HSRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_HSRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_HSRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- mtcr r11
- REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
- ld r1,GPR1(r1)
-
- /*
- * Go to virtual mode and pull the HMI event information from
- * firmware.
- */
- .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_hv
-
MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
@@ -1306,6 +1251,61 @@ fwnmi_data_area:
. = 0x8000
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+ .globl hmi_exception_early
+hmi_exception_early:
+ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ std r9,_CCR(r1) /* save CR in stackframe */
+ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
+ std r11,_NIP(r1) /* save HSRR0 in stackframe */
+ mfspr r12,SPRN_HSRR1 /* Save SRR1 */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
+ EXCEPTION_PROLOG_COMMON_3(0xe60)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl hmi_exception_realmode
+ /* Windup the stack. */
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r0,MSR_RI
+ mfmsr r9 /* get MSR value */
+ andc r9,r9,r0
+ mtmsrd r9,1 /* Clear MSR_RI */
+ /* Move original HSRR0 and HSRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ mtspr SPRN_HSRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_HSRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+
+ /*
+ * Go to virtual mode and pull the HMI event information from
+ * firmware.
+ */
+ .globl hmi_exception_after_realmode
+hmi_exception_after_realmode:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b hmi_exception_hv
+
#ifdef CONFIG_PPC_POWERNV
_GLOBAL(opal_mc_secondary_handler)
HMT_MEDIUM_PPR_DISCARD
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index c334f53453f..b5061abbd2e 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1210,10 +1210,12 @@ clear_utlb_entry:
/* We configure icbi to invalidate 128 bytes at a time since the
* current 32-bit kernel code isn't too happy with icache != dcache
- * block size
+ * block size. We also disable the BTAC as this can cause errors
+ * in some circumstances (see IBM Erratum 47).
*/
mfspr r3,SPRN_CCR0
oris r3,r3,0x0020
+ ori r3,r3,0x0040
mtspr SPRN_CCR0,r3
isync
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f84f799babb..a10642a0d86 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1120,37 +1120,41 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership);
int iommu_add_device(struct device *dev)
{
struct iommu_table *tbl;
- int ret = 0;
- if (WARN_ON(dev->iommu_group)) {
- pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
- dev_name(dev),
- iommu_group_id(dev->iommu_group));
+ /*
+ * The sysfs entries should be populated before
+ * binding IOMMU group. If sysfs entries isn't
+ * ready, we simply bail.
+ */
+ if (!device_is_registered(dev))
+ return -ENOENT;
+
+ if (dev->iommu_group) {
+ pr_debug("%s: Skipping device %s with iommu group %d\n",
+ __func__, dev_name(dev),
+ iommu_group_id(dev->iommu_group));
return -EBUSY;
}
tbl = get_iommu_table_base(dev);
if (!tbl || !tbl->it_group) {
- pr_debug("iommu_tce: skipping device %s with no tbl\n",
- dev_name(dev));
+ pr_debug("%s: Skipping device %s with no tbl\n",
+ __func__, dev_name(dev));
return 0;
}
- pr_debug("iommu_tce: adding %s to iommu group %d\n",
- dev_name(dev), iommu_group_id(tbl->it_group));
+ pr_debug("%s: Adding %s to iommu group %d\n",
+ __func__, dev_name(dev),
+ iommu_group_id(tbl->it_group));
if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
- pr_err("iommu_tce: unsupported iommu page size.");
- pr_err("%s has not been added\n", dev_name(dev));
+ pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
+ __func__, IOMMU_PAGE_SIZE(tbl),
+ PAGE_SIZE, dev_name(dev));
return -EINVAL;
}
- ret = iommu_group_add_device(tbl->it_group, dev);
- if (ret < 0)
- pr_err("iommu_tce: %s has not been added, ret=%d\n",
- dev_name(dev), ret);
-
- return ret;
+ return iommu_group_add_device(tbl->it_group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1a3b1055f5e..4e139f8a69e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -818,76 +818,6 @@ int cpu_to_chip_id(int cpu)
}
EXPORT_SYMBOL(cpu_to_chip_id);
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Fix up the uninitialized fields in a new device node:
- * name, type and pci-specific fields
- */
-
-static int of_finish_dynamic_node(struct device_node *node)
-{
- struct device_node *parent = of_get_parent(node);
- int err = 0;
- const phandle *ibm_phandle;
-
- node->name = of_get_property(node, "name", NULL);
- node->type = of_get_property(node, "device_type", NULL);
-
- if (!node->name)
- node->name = "<NULL>";
- if (!node->type)
- node->type = "<NULL>";
-
- if (!parent) {
- err = -ENODEV;
- goto out;
- }
-
- /* We don't support that function on PowerMac, at least
- * not yet
- */
- if (machine_is(powermac))
- return -ENODEV;
-
- /* fix up new node's phandle field */
- if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL)))
- node->phandle = *ibm_phandle;
-
-out:
- of_node_put(parent);
- return err;
-}
-
-static int prom_reconfig_notifier(struct notifier_block *nb,
- unsigned long action, void *node)
-{
- int err;
-
- switch (action) {
- case OF_RECONFIG_ATTACH_NODE:
- err = of_finish_dynamic_node(node);
- if (err < 0)
- printk(KERN_ERR "finish_node returned %d\n", err);
- break;
- default:
- err = 0;
- break;
- }
- return notifier_from_errno(err);
-}
-
-static struct notifier_block prom_reconfig_nb = {
- .notifier_call = prom_reconfig_notifier,
- .priority = 10, /* This one needs to run first */
-};
-
-static int __init prom_reconfig_setup(void)
-{
- return of_reconfig_notifier_register(&prom_reconfig_nb);
-}
-__initcall(prom_reconfig_setup);
-#endif
-
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return (int)phys_id == get_hard_smp_processor_id(cpu);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1007fb802e6..a0738af4aba 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -376,6 +376,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ /*
+ * numa_node_id() works after this.
+ */
+ set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+ set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
}
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@@ -723,12 +728,6 @@ void start_secondary(void *unused)
}
traverse_core_siblings(cpu, true);
- /*
- * numa_node_id() works after this.
- */
- set_numa_node(numa_cpu_lookup_table[cpu]);
- set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 0c9c8d7d073..170a0346f75 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw)
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
+ smp_mb();
+
while (lock->slock) {
HMT_low();
if (SHARED_PROCESSOR)
__spin_yield(lock);
}
HMT_medium();
+
+ smp_mb();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index cf1d325eae8..afc0a8295f8 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -412,18 +412,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
local_irq_restore(flags);
}
-static void native_hugepage_invalidate(struct mm_struct *mm,
+static void native_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
unsigned char *hpte_slot_array,
- unsigned long addr, int psize)
+ int psize, int ssize)
{
- int ssize = 0, i;
- int lock_tlbie;
+ int i;
struct hash_pte *hptep;
int actual_psize = MMU_PAGE_16M;
unsigned int max_hpte_count, valid;
unsigned long flags, s_addr = addr;
unsigned long hpte_v, want_v, shift;
- unsigned long hidx, vpn = 0, vsid, hash, slot;
+ unsigned long hidx, vpn = 0, hash, slot;
shift = mmu_psize_defs[psize].shift;
max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -437,15 +437,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
@@ -465,22 +456,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
+ /*
+ * We need to do tlb invalidate for all the address, tlbie
+ * instruction compares entry_VA in tlb with the VA specified
+ * here
+ */
+ tlbie(vpn, psize, actual_psize, ssize, 0);
}
- /*
- * Since this is a hugepage, we just need a single tlbie.
- * use the last vpn.
- */
- lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
- if (lock_tlbie)
- raw_spin_lock(&native_tlbie_lock);
-
- asm volatile("ptesync":::"memory");
- __tlbie(vpn, psize, actual_psize, ssize);
- asm volatile("eieio; tlbsync; ptesync":::"memory");
-
- if (lock_tlbie)
- raw_spin_unlock(&native_tlbie_lock);
-
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 826893fcb3a..5f5e6328c21 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,6 +18,57 @@
#include <linux/mm.h>
#include <asm/machdep.h>
+static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize)
+{
+ int i, max_hpte_count, valid;
+ unsigned long s_addr;
+ unsigned char *hpte_slot_array;
+ unsigned long hidx, shift, vpn, hash, slot;
+
+ s_addr = addr & HPAGE_PMD_MASK;
+ hpte_slot_array = get_hpte_slot_array(pmdp);
+ /*
+ * IF we try to do a HUGE PTE update after a withdraw is done.
+ * we will find the below NULL. This happens when we do
+ * split_huge_page_pmd
+ */
+ if (!hpte_slot_array)
+ return;
+
+ if (ppc_md.hugepage_invalidate)
+ return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize);
+ /*
+ * No bluk hpte removal support, invalidate each entry
+ */
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HPAGE_PMD_SIZE >> shift;
+ for (i = 0; i < max_hpte_count; i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_valid(hpte_slot_array, i);
+ if (!valid)
+ continue;
+ hidx = hpte_hash_index(hpte_slot_array, i);
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ ppc_md.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, 0);
+ }
+}
+
+
int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
pmd_t *pmdp, unsigned long trap, int local, int ssize,
unsigned int psize)
@@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* atomically mark the linux large page PMD busy and dirty
*/
do {
- old_pmd = pmd_val(*pmdp);
+ pmd_t pmd = ACCESS_ONCE(*pmdp);
+
+ old_pmd = pmd_val(pmd);
/* If PMD busy, retry the access */
if (unlikely(old_pmd & _PAGE_BUSY))
return 0;
@@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
vpn = hpt_vpn(ea, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
hpte_slot_array = get_hpte_slot_array(pmdp);
+ if (psize == MMU_PAGE_4K) {
+ /*
+ * invalidate the old hpte entry if we have that mapped via 64K
+ * base page size. This is because demote_segment won't flush
+ * hash page table entries.
+ */
+ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+ invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
+ }
valid = hpte_valid(hpte_slot_array, index);
if (valid) {
@@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* safely update this here.
*/
valid = 0;
- new_pmd &= ~_PAGE_HPTEFLAGS;
hpte_slot_array[index] = 0;
- } else
- /* clear the busy bits and set the hash pte bits */
- new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+ }
}
if (!valid) {
@@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
/* insert new entry */
pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
-repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
-
- /* clear the busy bits and set the hash pte bits */
- new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+ new_pmd |= _PAGE_HASHPTE;
/* Add in WIMG bits */
rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
@@ -132,6 +187,8 @@ repeat:
* enable the memory coherence always
*/
rflags |= HPTE_R_M;
+repeat:
+ hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -172,8 +229,17 @@ repeat:
mark_hpte_slot_valid(hpte_slot_array, index, slot);
}
/*
- * No need to use ldarx/stdcx here
+ * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
+ * base page size 4k.
+ */
+ if (psize == MMU_PAGE_4K)
+ new_pmd |= _PAGE_COMBO;
+ /*
+ * The hpte valid is stored in the pgtable whose address is in the
+ * second half of the PMD. Order this against clearing of the busy bit in
+ * huge pmd.
*/
+ smp_wmb();
*pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
return 0;
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d3e9a78eaed..d7737a542fd 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1049,7 +1049,7 @@ static void __init mark_reserved_regions_for_nid(int nid)
void __init do_init_bootmem(void)
{
- int nid;
+ int nid, cpu;
min_low_pfn = 0;
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -1122,8 +1122,15 @@ void __init do_init_bootmem(void)
reset_numa_cpu_lookup_table();
register_cpu_notifier(&ppc64_numa_nb);
- cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
- (void *)(unsigned long)boot_cpuid);
+ /*
+ * We need the numa_cpu_lookup_table to be accurate for all CPUs,
+ * even before we online them, so that we can use cpu_to_{node,mem}
+ * early in boot, cf. smp_prepare_cpus().
+ */
+ for_each_possible_cpu(cpu) {
+ cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
+ (void *)(unsigned long)cpu);
+ }
}
void __init paging_init(void)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3b3c4d34c7a..c8d709ab489 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -54,6 +54,9 @@
#include "mmu_decl.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+
/* Some sanity checking */
#if TASK_SIZE_USER64 > PGTABLE_RANGE
#error TASK_SIZE_USER64 exceeds pagetable range
@@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
old = pmd_val(*pmdp);
*pmdp = __pmd((old & ~clr) | set);
#endif
+ trace_hugepage_update(addr, old, clr, set);
if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(mm, addr, pmdp);
+ hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
}
@@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
* If we didn't had the splitting flag set, go and flush the
* HPTE entries.
*/
+ trace_hugepage_splitting(address, old);
if (!(old & _PAGE_SPLITTING)) {
/* We need to flush the hpte */
if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(vma->vm_mm, address, pmdp);
+ hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
}
/*
* This ensures that generic code that rely on IRQ disabling
@@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
+ trace_hugepage_set_pmd(addr, pmd);
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
@@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
* neesd to be flushed.
*/
void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
+ pmd_t *pmdp, unsigned long old_pmd)
{
int ssize, i;
unsigned long s_addr;
@@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
if (!hpte_slot_array)
return;
- /* get the base page size */
+ /* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
psize = get_slice_psize(mm, s_addr);
+ BUG_ON(psize == MMU_PAGE_16M);
+#endif
+ if (old_pmd & _PAGE_COMBO)
+ psize = MMU_PAGE_4K;
+ else
+ psize = MMU_PAGE_64K;
+
+ if (!is_kernel_addr(s_addr)) {
+ ssize = user_segment_size(s_addr);
+ vsid = get_vsid(mm->context.id, s_addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
if (ppc_md.hugepage_invalidate)
- return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
- s_addr, psize);
+ return ppc_md.hugepage_invalidate(vsid, s_addr,
+ hpte_slot_array,
+ psize, ssize);
/*
* No bluk hpte removal support, invalidate each entry
*/
@@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c99f6510a0b..d2a94b85dbc 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -30,6 +30,8 @@
#include <asm/tlb.h>
#include <asm/bug.h>
+#include <trace/events/thp.h>
+
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
/*
@@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
if (ptep == NULL)
continue;
pte = pte_val(*ptep);
+ if (hugepage_shift)
+ trace_hugepage_invalidate(start, pte_val(pte));
if (!(pte & _PAGE_HASHPTE))
continue;
if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
- hpte_do_hugepage_flush(mm, start, (pmd_t *)pte);
+ hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
else
hpte_need_flush(mm, start, ptep, pte, 0);
}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 92cb18d52ea..f38ea4df6a8 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -581,42 +581,10 @@ static void setup_mmu_htw(void)
/*
* Early initialization of the MMU TLB code
*/
-static void __early_init_mmu(int boot_cpu)
+static void early_init_this_mmu(void)
{
unsigned int mas4;
- /* XXX This will have to be decided at runtime, but right
- * now our boot and TLB miss code hard wires it. Ideally
- * we should find out a suitable page size and patch the
- * TLB miss code (either that or use the PACA to store
- * the value we want)
- */
- mmu_linear_psize = MMU_PAGE_1G;
-
- /* XXX This should be decided at runtime based on supported
- * page sizes in the TLB, but for now let's assume 16M is
- * always there and a good fit (which it probably is)
- *
- * Freescale booke only supports 4K pages in TLB0, so use that.
- */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
- mmu_vmemmap_psize = MMU_PAGE_4K;
- else
- mmu_vmemmap_psize = MMU_PAGE_16M;
-
- /* XXX This code only checks for TLB 0 capabilities and doesn't
- * check what page size combos are supported by the HW. It
- * also doesn't handle the case where a separate array holds
- * the IND entries from the array loaded by the PT.
- */
- if (boot_cpu) {
- /* Look for supported page sizes */
- setup_page_sizes();
-
- /* Look for HW tablewalk support */
- setup_mmu_htw();
- }
-
/* Set MAS4 based on page table setting */
mas4 = 0x4 << MAS4_WIMGED_SHIFT;
@@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu)
}
mtspr(SPRN_MAS4, mas4);
- /* Set the global containing the top of the linear mapping
- * for use by the TLB miss code
- */
- linear_map_top = memblock_end_of_DRAM();
-
#ifdef CONFIG_PPC_FSL_BOOK3E
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned int num_cams;
@@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu)
/* use a quarter of the TLBCAM for bolted linear map */
num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
+ }
+#endif
- /* limit memory so we dont have linear faults */
- memblock_enforce_memory_limit(linear_map_top);
+ /* A sync won't hurt us after mucking around with
+ * the MMU configuration
+ */
+ mb();
+}
+static void __init early_init_mmu_global(void)
+{
+ /* XXX This will have to be decided at runtime, but right
+ * now our boot and TLB miss code hard wires it. Ideally
+ * we should find out a suitable page size and patch the
+ * TLB miss code (either that or use the PACA to store
+ * the value we want)
+ */
+ mmu_linear_psize = MMU_PAGE_1G;
+
+ /* XXX This should be decided at runtime based on supported
+ * page sizes in the TLB, but for now let's assume 16M is
+ * always there and a good fit (which it probably is)
+ *
+ * Freescale booke only supports 4K pages in TLB0, so use that.
+ */
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ mmu_vmemmap_psize = MMU_PAGE_4K;
+ else
+ mmu_vmemmap_psize = MMU_PAGE_16M;
+
+ /* XXX This code only checks for TLB 0 capabilities and doesn't
+ * check what page size combos are supported by the HW. It
+ * also doesn't handle the case where a separate array holds
+ * the IND entries from the array loaded by the PT.
+ */
+ /* Look for supported page sizes */
+ setup_page_sizes();
+
+ /* Look for HW tablewalk support */
+ setup_mmu_htw();
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
if (book3e_htw_mode == PPC_HTW_NONE) {
extlb_level_exc = EX_TLB_SIZE;
patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
@@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu)
}
#endif
- /* A sync won't hurt us after mucking around with
- * the MMU configuration
+ /* Set the global containing the top of the linear mapping
+ * for use by the TLB miss code
*/
- mb();
+ linear_map_top = memblock_end_of_DRAM();
+}
+
+static void __init early_mmu_set_memory_limit(void)
+{
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+ /*
+ * Limit memory so we dont have linear faults.
+ * Unlike memblock_set_current_limit, which limits
+ * memory available during early boot, this permanently
+ * reduces the memory available to Linux. We need to
+ * do this because highmem is not supported on 64-bit.
+ */
+ memblock_enforce_memory_limit(linear_map_top);
+ }
+#endif
memblock_set_current_limit(linear_map_top);
}
+/* boot cpu only */
void __init early_init_mmu(void)
{
- __early_init_mmu(1);
+ early_init_mmu_global();
+ early_init_this_mmu();
+ early_mmu_set_memory_limit();
}
void early_init_mmu_secondary(void)
{
- __early_init_mmu(0);
+ early_init_this_mmu();
}
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 66d0f179650..70d4f748b54 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -223,7 +223,7 @@ e_free:
pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
" rc=%ld\n",
catalog_version_num, page_offset, hret);
- kfree(page);
+ kmem_cache_free(hv_page_cache, page);
pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
offset, page_offset, count, page_count, catalog_len,
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 1413e72bc2e..4882bfd90e2 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2805,25 +2805,20 @@ set_initial_features(void)
/* Enable GMAC for now for PCI probing. It will be disabled
* later on after PCI probe
*/
- np = of_find_node_by_name(NULL, "ethernet");
- while(np) {
+ for_each_node_by_name(np, "ethernet")
if (of_device_is_compatible(np, "K2-GMAC"))
g5_gmac_enable(np, 0, 1);
- np = of_find_node_by_name(np, "ethernet");
- }
/* Enable FW before PCI probe. Will be disabled later on
* Note: We should have a batter way to check that we are
* dealing with uninorth internal cell and not a PCI cell
* on the external PCI. The code below works though.
*/
- np = of_find_node_by_name(NULL, "firewire");
- while(np) {
+ for_each_node_by_name(np, "firewire") {
if (of_device_is_compatible(np, "pci106b,5811")) {
macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
g5_fw_enable(np, 0, 1);
}
- np = of_find_node_by_name(np, "firewire");
}
}
#else /* CONFIG_PPC64 */
@@ -2834,13 +2829,11 @@ set_initial_features(void)
/* Enable GMAC for now for PCI probing. It will be disabled
* later on after PCI probe
*/
- np = of_find_node_by_name(NULL, "ethernet");
- while(np) {
+ for_each_node_by_name(np, "ethernet") {
if (np->parent
&& of_device_is_compatible(np->parent, "uni-north")
&& of_device_is_compatible(np, "gmac"))
core99_gmac_enable(np, 0, 1);
- np = of_find_node_by_name(np, "ethernet");
}
/* Enable FW before PCI probe. Will be disabled later on
@@ -2848,8 +2841,7 @@ set_initial_features(void)
* dealing with uninorth internal cell and not a PCI cell
* on the external PCI. The code below works though.
*/
- np = of_find_node_by_name(NULL, "firewire");
- while(np) {
+ for_each_node_by_name(np, "firewire") {
if (np->parent
&& of_device_is_compatible(np->parent, "uni-north")
&& (of_device_is_compatible(np, "pci106b,18") ||
@@ -2858,18 +2850,16 @@ set_initial_features(void)
macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
core99_firewire_enable(np, 0, 1);
}
- np = of_find_node_by_name(np, "firewire");
}
/* Enable ATA-100 before PCI probe. */
np = of_find_node_by_name(NULL, "ata-6");
- while(np) {
+ for_each_node_by_name(np, "ata-6") {
if (np->parent
&& of_device_is_compatible(np->parent, "uni-north")
&& of_device_is_compatible(np, "kauai-ata")) {
core99_ata100_enable(np, 1);
}
- np = of_find_node_by_name(np, "ata-6");
}
/* Switch airport off */
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index cf7009b8c7b..7e868ccf3b0 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -698,7 +698,7 @@ static void __init fixup_nec_usb2(void)
{
struct device_node *nec;
- for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) {
+ for_each_node_by_name(nec, "usb") {
struct pci_controller *hose;
u32 data;
const u32 *prop;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 5cbd4d67d5c..af094ae03db 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -577,7 +577,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus)
int ok;
/* Look for the clock chip */
- while ((cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL) {
+ for_each_node_by_name(cc, "i2c-hwclock") {
p = of_get_parent(cc);
ok = p && of_device_is_compatible(p, "uni-n-i2c");
of_node_put(p);
diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c
index 44e0b55a2a0..366bd221ede 100644
--- a/arch/powerpc/platforms/powermac/udbg_adb.c
+++ b/arch/powerpc/platforms/powermac/udbg_adb.c
@@ -191,7 +191,7 @@ int __init udbg_adb_init(int force_btext)
* of type "adb". If not, we return a failure, but we keep the
* bext output set for now
*/
- for (np = NULL; (np = of_find_node_by_name(np, "keyboard")) != NULL;) {
+ for_each_node_by_name(np, "keyboard") {
struct device_node *parent = of_get_parent(np);
int found = (parent && strcmp(parent->type, "adb") == 0);
of_node_put(parent);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a328be44880..2e6ce1b8dc8 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -245,3 +245,5 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
+OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f0a01a46a57..b44eec3e8db 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -605,6 +605,24 @@ static int opal_sysfs_init(void)
return 0;
}
+static void __init opal_dump_region_init(void)
+{
+ void *addr;
+ uint64_t size;
+ int rc;
+
+ /* Register kernel log buffer */
+ addr = log_buf_addr_get();
+ size = log_buf_len_get();
+ rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
+ __pa(addr), size);
+ /* Don't warn if this is just an older OPAL that doesn't
+ * know about that call
+ */
+ if (rc && rc != OPAL_UNSUPPORTED)
+ pr_warn("DUMP: Failed to register kernel log buffer. "
+ "rc = %d\n", rc);
+}
static int __init opal_init(void)
{
struct device_node *np, *consoles;
@@ -654,6 +672,8 @@ static int __init opal_init(void)
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
+ /* Setup dump region interface */
+ opal_dump_region_init();
/* Setup error log interface */
rc = opal_elog_init();
/* Setup code update interface */
@@ -694,6 +714,9 @@ void opal_shutdown(void)
else
mdelay(10);
}
+
+ /* Unregister memory dump region */
+ opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
}
/* Export this so that test modules can use it */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b136108ddc9..df241b11d4f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -857,7 +857,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+ set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
}
static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 7995135170a..c904583baf4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -146,7 +146,7 @@ static inline int pseries_remove_memblock(unsigned long base,
}
static inline int pseries_remove_mem_node(struct device_node *np)
{
- return -EOPNOTSUPP;
+ return 0;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
@@ -194,7 +194,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
if (!memblock_size)
return -EINVAL;
- p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+ p = (u32 *) pr->old_prop->value;
if (!p)
return -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 4557e91626c..eedb64594dc 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -163,8 +163,8 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
return retval;
}
- last_p_partition_ID = pi_buff[0];
- last_p_unit_address = pi_buff[1];
+ last_p_partition_ID = be64_to_cpu(pi_buff[0]);
+ last_p_unit_address = be64_to_cpu(pi_buff[1]);
/* This indicates that there are no further partners */
if (last_p_partition_ID == ~0UL
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 33b552ffbe5..4642d6a4d35 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -721,13 +721,13 @@ static int __init disable_ddw_setup(char *str)
early_param("disable_ddw", disable_ddw_setup);
-static void remove_ddw(struct device_node *np)
+static void remove_ddw(struct device_node *np, bool remove_prop)
{
struct dynamic_dma_window_prop *dwp;
struct property *win64;
const u32 *ddw_avail;
u64 liobn;
- int len, ret;
+ int len, ret = 0;
ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
@@ -761,7 +761,8 @@ static void remove_ddw(struct device_node *np)
np->full_name, ret, ddw_avail[2], liobn);
delprop:
- ret = of_remove_property(np, win64);
+ if (remove_prop)
+ ret = of_remove_property(np, win64);
if (ret)
pr_warning("%s: failed to remove direct window property: %d\n",
np->full_name, ret);
@@ -805,7 +806,7 @@ static int find_existing_ddw_windows(void)
window = kzalloc(sizeof(*window), GFP_KERNEL);
if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
kfree(window);
- remove_ddw(pdn);
+ remove_ddw(pdn, true);
continue;
}
@@ -1045,7 +1046,7 @@ out_free_window:
kfree(window);
out_clear_window:
- remove_ddw(pdn);
+ remove_ddw(pdn, true);
out_free_prop:
kfree(win64->name);
@@ -1255,7 +1256,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
switch (action) {
case OF_RECONFIG_DETACH_NODE:
- remove_ddw(np);
+ /*
+ * Removing the property will invoke the reconfig
+ * notifier again, which causes dead-lock on the
+ * read-write semaphore of the notifier chain. So
+ * we have to remove the property when releasing
+ * the device node.
+ */
+ remove_ddw(np, false);
if (pci && pci->iommu_table)
iommu_free_table(pci->iommu_table, np->full_name);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index fbfcef514aa..34e64237fff 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -431,16 +431,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
-static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
- unsigned char *hpte_slot_array,
- unsigned long addr, int psize)
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
+ unsigned char *hpte_slot_array,
+ int psize, int ssize)
{
- int ssize = 0, i, index = 0;
+ int i, index = 0;
unsigned long s_addr = addr;
unsigned int max_hpte_count, valid;
unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
- unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
+ unsigned long shift, hidx, vpn = 0, hash, slot;
shift = mmu_psize_defs[psize].shift;
max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -453,15 +454,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index cfe8a6389a5..e724d3186e7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -232,8 +232,7 @@ static void __init pseries_discover_pic(void)
struct device_node *np;
const char *typep;
- for (np = NULL; (np = of_find_node_by_name(np,
- "interrupt-controller"));) {
+ for_each_node_by_name(np, "interrupt-controller") {
typep = of_get_property(np, "compatible", NULL);
if (strstr(typep, "open-pic")) {
pSeries_mpic_node = of_node_get(np);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 8d198b5e9e0..b988b5addf8 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -24,6 +24,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/bug.h>
+#include <linux/nmi.h>
#include <asm/ptrace.h>
#include <asm/string.h>
@@ -374,6 +375,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
#endif
local_irq_save(flags);
+ hard_irq_disable();
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
@@ -558,6 +560,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
#endif
insert_cpu_bpts();
+ touch_nmi_watchdog();
local_irq_restore(flags);
return cmd != 'X' && cmd != EOF;
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index c0413046483..1580e7a5a4c 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -118,6 +118,7 @@ static int __init xlated_setup_gnttab_pages(void)
{
struct page **pages;
xen_pfn_t *pfns;
+ void *vaddr;
int rc;
unsigned int i;
unsigned long nr_grant_frames = gnttab_max_grant_frames();
@@ -143,21 +144,20 @@ static int __init xlated_setup_gnttab_pages(void)
for (i = 0; i < nr_grant_frames; i++)
pfns[i] = page_to_pfn(pages[i]);
- rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames,
- &xen_auto_xlat_grant_frames.vaddr);
-
- if (rc) {
+ vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL);
+ if (!vaddr) {
pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
nr_grant_frames, rc);
free_xenballooned_pages(nr_grant_frames, pages);
kfree(pages);
kfree(pfns);
- return rc;
+ return -ENOMEM;
}
kfree(pages);
xen_auto_xlat_grant_frames.pfn = pfns;
xen_auto_xlat_grant_frames.count = nr_grant_frames;
+ xen_auto_xlat_grant_frames.vaddr = vaddr;
return 0;
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 7b78f88c170..5718b0b58b6 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -444,7 +444,7 @@ void xen_setup_timer(int cpu)
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
- IRQF_FORCE_RESUME,
+ IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
name, NULL);
(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9e241063a61..bc423f7b02d 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
bs->bvec_integrity_pool);
if (!bip->bip_vec)
goto err;
+ bip->bip_max_vcnt = bvec_nr_vecs(idx);
} else {
bip->bip_vec = bip->bip_inline_vecs;
+ bip->bip_max_vcnt = inline_vecs;
}
bip->bip_slab = idx;
@@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio)
}
EXPORT_SYMBOL(bio_integrity_free);
-static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
-{
- if (bip->bip_slab == BIO_POOL_NONE)
- return BIP_INLINE_VECS;
-
- return bvec_nr_vecs(bip->bip_slab);
-}
-
/**
* bio_integrity_add_page - Attach integrity metadata
* @bio: bio to update
@@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
- if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
+ if (bip->bip_vcnt >= bip->bip_max_vcnt) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
}
diff --git a/block/bio.c b/block/bio.c
index 0ec61c9e536..3e6331d25d9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
bslab = &bio_slabs[entry];
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
- slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
+ slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
+ SLAB_HWCACHE_ALIGN, NULL);
if (!slab)
goto out_unlock;
diff --git a/block/blk-core.c b/block/blk-core.c
index 6f8dba161bf..c359d72e9d7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -438,14 +438,17 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
*/
void blk_queue_bypass_start(struct request_queue *q)
{
- bool drain;
-
spin_lock_irq(q->queue_lock);
- drain = !q->bypass_depth++;
+ q->bypass_depth++;
queue_flag_set(QUEUE_FLAG_BYPASS, q);
spin_unlock_irq(q->queue_lock);
- if (drain) {
+ /*
+ * Queues start drained. Skip actual draining till init is
+ * complete. This avoids lenghty delays during queue init which
+ * can happen many times during boot.
+ */
+ if (blk_queue_init_done(q)) {
spin_lock_irq(q->queue_lock);
__blk_drain_queue(q, false);
spin_unlock_irq(q->queue_lock);
@@ -511,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q)
* prevent that q->request_fn() gets invoked after draining finished.
*/
if (q->mq_ops) {
- blk_mq_drain_queue(q);
+ blk_mq_freeze_queue(q);
spin_lock_irq(lock);
} else {
spin_lock_irq(lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ad69ef657e8..5189cb1e478 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,68 +78,47 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
static int blk_mq_queue_enter(struct request_queue *q)
{
- int ret;
-
- __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
- smp_wmb();
-
- /* we have problems freezing the queue if it's initializing */
- if (!blk_queue_dying(q) &&
- (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
- return 0;
-
- __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+ while (true) {
+ int ret;
- spin_lock_irq(q->queue_lock);
- ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
- !blk_queue_bypass(q) || blk_queue_dying(q),
- *q->queue_lock);
- /* inc usage with lock hold to avoid freeze_queue runs here */
- if (!ret && !blk_queue_dying(q))
- __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
- else if (blk_queue_dying(q))
- ret = -ENODEV;
- spin_unlock_irq(q->queue_lock);
+ if (percpu_ref_tryget_live(&q->mq_usage_counter))
+ return 0;
- return ret;
+ ret = wait_event_interruptible(q->mq_freeze_wq,
+ !q->mq_freeze_depth || blk_queue_dying(q));
+ if (blk_queue_dying(q))
+ return -ENODEV;
+ if (ret)
+ return ret;
+ }
}
static void blk_mq_queue_exit(struct request_queue *q)
{
- __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+ percpu_ref_put(&q->mq_usage_counter);
}
-void blk_mq_drain_queue(struct request_queue *q)
+static void blk_mq_usage_counter_release(struct percpu_ref *ref)
{
- while (true) {
- s64 count;
-
- spin_lock_irq(q->queue_lock);
- count = percpu_counter_sum(&q->mq_usage_counter);
- spin_unlock_irq(q->queue_lock);
+ struct request_queue *q =
+ container_of(ref, struct request_queue, mq_usage_counter);
- if (count == 0)
- break;
- blk_mq_start_hw_queues(q);
- msleep(10);
- }
+ wake_up_all(&q->mq_freeze_wq);
}
/*
* Guarantee no request is in use, so we can change any data structure of
* the queue afterward.
*/
-static void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue(struct request_queue *q)
{
- bool drain;
-
spin_lock_irq(q->queue_lock);
- drain = !q->bypass_depth++;
- queue_flag_set(QUEUE_FLAG_BYPASS, q);
+ q->mq_freeze_depth++;
spin_unlock_irq(q->queue_lock);
- if (drain)
- blk_mq_drain_queue(q);
+ percpu_ref_kill(&q->mq_usage_counter);
+ blk_mq_run_queues(q, false);
+ wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
}
static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -147,14 +126,13 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
bool wake = false;
spin_lock_irq(q->queue_lock);
- if (!--q->bypass_depth) {
- queue_flag_clear(QUEUE_FLAG_BYPASS, q);
- wake = true;
- }
- WARN_ON_ONCE(q->bypass_depth < 0);
+ wake = !--q->mq_freeze_depth;
+ WARN_ON_ONCE(q->mq_freeze_depth < 0);
spin_unlock_irq(q->queue_lock);
- if (wake)
+ if (wake) {
+ percpu_ref_reinit(&q->mq_usage_counter);
wake_up_all(&q->mq_freeze_wq);
+ }
}
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
@@ -1798,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
if (!q)
goto err_hctxs;
- if (percpu_counter_init(&q->mq_usage_counter, 0))
+ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
goto err_map;
setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
@@ -1891,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q)
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
blk_mq_free_hw_queues(q, set);
- percpu_counter_destroy(&q->mq_usage_counter);
+ percpu_ref_exit(&q->mq_usage_counter);
free_percpu(q->queue_ctx);
kfree(q->queue_hw_ctx);
@@ -2050,8 +2028,7 @@ static int __init blk_mq_init(void)
{
blk_mq_cpu_init();
- /* Must be called after percpu_counter_hotcpu_callback() */
- hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
+ hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
return 0;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 26460884c6c..ca4964a6295 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -28,7 +28,7 @@ struct blk_mq_ctx {
void __blk_mq_complete_request(struct request *rq);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_init_flush(struct request_queue *q);
-void blk_mq_drain_queue(struct request_queue *q);
+void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 23321fbab29..4db5abf96b9 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -554,8 +554,8 @@ int blk_register_queue(struct gendisk *disk)
* Initialization must be complete by now. Finish the initial
* bypass from queue allocation.
*/
- blk_queue_bypass_end(q);
queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
+ blk_queue_bypass_end(q);
ret = blk_trace_init_sysfs(dev);
if (ret)
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index a0926a6094b..18b282ce361 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -663,6 +663,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
fmode_t mode = file->f_mode;
struct backing_dev_info *bdi;
loff_t size;
+ unsigned int max_sectors;
/*
* O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
@@ -719,8 +720,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case BLKSSZGET: /* get block device hardware sector size */
return compat_put_int(arg, bdev_logical_block_size(bdev));
case BLKSECTGET:
- return compat_put_ushort(arg,
- queue_max_sectors(bdev_get_queue(bdev)));
+ max_sectors = min_t(unsigned int, USHRT_MAX,
+ queue_max_sectors(bdev_get_queue(bdev)));
+ return compat_put_ushort(arg, max_sectors);
case BLKROTATIONAL:
return compat_put_ushort(arg,
!blk_queue_nonrot(bdev_get_queue(bdev)));
diff --git a/block/ioctl.c b/block/ioctl.c
index 7d5c3b20af4..d6cda8147c9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -278,6 +278,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
struct backing_dev_info *bdi;
loff_t size;
int ret, n;
+ unsigned int max_sectors;
switch(cmd) {
case BLKFLSBUF:
@@ -375,7 +376,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKDISCARDZEROES:
return put_uint(arg, bdev_discard_zeroes_data(bdev));
case BLKSECTGET:
- return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
+ max_sectors = min_t(unsigned int, USHRT_MAX,
+ queue_max_sectors(bdev_get_queue(bdev)));
+ return put_ushort(arg, max_sectors);
case BLKROTATIONAL:
return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
case BLKRASET:
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index 43be471d9b1..f3ed7b2d89b 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -215,7 +215,7 @@ int aix_partition(struct parsed_partitions *state)
numlvs = be16_to_cpu(p->numlvs);
put_dev_sector(sect);
}
- lvip = kzalloc(sizeof(struct lv_info) * state->limit, GFP_KERNEL);
+ lvip = kcalloc(state->limit, sizeof(struct lv_info), GFP_KERNEL);
if (!lvip)
return 0;
if (numlvs && (d = read_part_sector(state, vgda_sector + 1, &sect))) {
@@ -253,7 +253,7 @@ int aix_partition(struct parsed_partitions *state)
continue;
}
lv_ix = be16_to_cpu(p->lv_ix) - 1;
- if (lv_ix > state->limit) {
+ if (lv_ix >= state->limit) {
cur_lv_ix = -1;
continue;
}
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 70cbf44a156..2b13533d60a 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -7,6 +7,8 @@
* Re-organised Feb 1998 Russell King
*/
+#define pr_fmt(fmt) fmt
+
#include <linux/types.h>
#include <linux/affs_hardblocks.h>
@@ -40,7 +42,7 @@ int amiga_partition(struct parsed_partitions *state)
data = read_part_sector(state, blk, &sect);
if (!data) {
if (warn_no_part)
- printk("Dev %s: unable to read RDB block %d\n",
+ pr_err("Dev %s: unable to read RDB block %d\n",
bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
@@ -57,12 +59,12 @@ int amiga_partition(struct parsed_partitions *state)
*(__be32 *)(data+0xdc) = 0;
if (checksum_block((__be32 *)data,
be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
- printk("Warning: Trashed word at 0xd0 in block %d "
- "ignored in checksum calculation\n",blk);
+ pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n",
+ blk);
break;
}
- printk("Dev %s: RDB in block %d has bad checksum\n",
+ pr_err("Dev %s: RDB in block %d has bad checksum\n",
bdevname(state->bdev, b), blk);
}
@@ -83,7 +85,7 @@ int amiga_partition(struct parsed_partitions *state)
data = read_part_sector(state, blk, &sect);
if (!data) {
if (warn_no_part)
- printk("Dev %s: unable to read partition block %d\n",
+ pr_err("Dev %s: unable to read partition block %d\n",
bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index dc51f467a56..56d08fd75b1 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -121,7 +121,7 @@ __setup("gpt", force_gpt_fn);
/**
* efi_crc32() - EFI version of crc32 function
* @buf: buffer to calculate crc32 of
- * @len - length of buf
+ * @len: length of buf
*
* Description: Returns EFI-style CRC32 value for @buf
*
@@ -240,10 +240,10 @@ done:
/**
* read_lba(): Read bytes from disk, starting at given LBA
- * @state
- * @lba
- * @buffer
- * @size_t
+ * @state: disk parsed partitions
+ * @lba: the Logical Block Address of the partition table
+ * @buffer: destination buffer
+ * @count: bytes to read
*
* Description: Reads @count bytes from @state->bdev into @buffer.
* Returns number of bytes read on success, 0 on error.
@@ -277,8 +277,8 @@ static size_t read_lba(struct parsed_partitions *state,
/**
* alloc_read_gpt_entries(): reads partition entries from disk
- * @state
- * @gpt - GPT header
+ * @state: disk parsed partitions
+ * @gpt: GPT header
*
* Description: Returns ptes on success, NULL on error.
* Allocates space for PTEs based on information found in @gpt.
@@ -312,8 +312,8 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
/**
* alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
- * @state
- * @lba is the Logical Block Address of the partition table
+ * @state: disk parsed partitions
+ * @lba: the Logical Block Address of the partition table
*
* Description: returns GPT header on success, NULL on error. Allocates
* and fills a GPT header starting at @ from @state->bdev.
@@ -340,10 +340,10 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
/**
* is_gpt_valid() - tests one GPT header and PTEs for validity
- * @state
- * @lba is the logical block address of the GPT header to test
- * @gpt is a GPT header ptr, filled on return.
- * @ptes is a PTEs ptr, filled on return.
+ * @state: disk parsed partitions
+ * @lba: logical block address of the GPT header to test
+ * @gpt: GPT header ptr, filled on return.
+ * @ptes: PTEs ptr, filled on return.
*
* Description: returns 1 if valid, 0 on error.
* If valid, returns pointers to newly allocated GPT header and PTEs.
@@ -461,8 +461,8 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
/**
* is_pte_valid() - tests one PTE for validity
- * @pte is the pte to check
- * @lastlba is last lba of the disk
+ * @pte:pte to check
+ * @lastlba: last lba of the disk
*
* Description: returns 1 if valid, 0 on error.
*/
@@ -478,9 +478,10 @@ is_pte_valid(const gpt_entry *pte, const u64 lastlba)
/**
* compare_gpts() - Search disk for valid GPT headers and PTEs
- * @pgpt is the primary GPT header
- * @agpt is the alternate GPT header
- * @lastlba is the last LBA number
+ * @pgpt: primary GPT header
+ * @agpt: alternate GPT header
+ * @lastlba: last LBA number
+ *
* Description: Returns nothing. Sanity checks pgpt and agpt fields
* and prints warnings on discrepancies.
*
@@ -572,9 +573,10 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
/**
* find_valid_gpt() - Search disk for valid GPT headers and PTEs
- * @state
- * @gpt is a GPT header ptr, filled on return.
- * @ptes is a PTEs ptr, filled on return.
+ * @state: disk parsed partitions
+ * @gpt: GPT header ptr, filled on return.
+ * @ptes: PTEs ptr, filled on return.
+ *
* Description: Returns 1 if valid, 0 on error.
* If valid, returns pointers to newly allocated GPT header and PTEs.
* Validity depends on PMBR being valid (or being overridden by the
@@ -663,7 +665,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
/**
* efi_partition(struct parsed_partitions *state)
- * @state
+ * @state: disk parsed partitions
*
* Description: called from check.c, if the disk contains GPT
* partitions, sets up partition entries in the kernel.
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 9123f250b42..93e7c1b32ed 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -159,8 +159,9 @@ static void parse_extended(struct parsed_partitions *state,
/*
* First process the data partition(s)
*/
- for (i=0; i<4; i++, p++) {
+ for (i = 0; i < 4; i++, p++) {
sector_t offs, size, next;
+
if (!nr_sects(p) || is_extended_partition(p))
continue;
@@ -194,7 +195,7 @@ static void parse_extended(struct parsed_partitions *state,
* It should be a link to the next logical partition.
*/
p -= 4;
- for (i=0; i<4; i++, p++)
+ for (i = 0; i < 4; i++, p++)
if (nr_sects(p) && is_extended_partition(p))
break;
if (i == 4)
@@ -243,8 +244,8 @@ static void parse_solaris_x86(struct parsed_partitions *state,
return;
}
/* Ensure we can handle previous case of VTOC with 8 entries gracefully */
- max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
- for (i=0; i<max_nparts && state->next<state->limit; i++) {
+ max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
+ for (i = 0; i < max_nparts && state->next < state->limit; i++) {
struct solaris_x86_slice *s = &v->v_slice[i];
char tmp[3 + 10 + 1 + 1];
@@ -409,7 +410,7 @@ static void parse_minix(struct parsed_partitions *state,
/* The first sector of a Minix partition can have either
* a secondary MBR describing its subpartitions, or
* the normal boot sector. */
- if (msdos_magic_present (data + 510) &&
+ if (msdos_magic_present(data + 510) &&
SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */
char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1];
@@ -527,6 +528,7 @@ int msdos_partition(struct parsed_partitions *state)
for (slot = 1 ; slot <= 4 ; slot++, p++) {
sector_t start = start_sect(p)*sector_size;
sector_t size = nr_sects(p)*sector_size;
+
if (!size)
continue;
if (is_extended_partition(p)) {
@@ -537,6 +539,7 @@ int msdos_partition(struct parsed_partitions *state)
* sector, although it may not be enough/proper.
*/
sector_t n = 2;
+
n = min(size, max(sector_size, n));
put_partition(state, slot, start, n);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 14695c6221c..51bf5155ee7 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -82,9 +82,18 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
return err;
}
+static int max_sectors_bytes(struct request_queue *q)
+{
+ unsigned int max_sectors = queue_max_sectors(q);
+
+ max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9);
+
+ return max_sectors << 9;
+}
+
static int sg_get_reserved_size(struct request_queue *q, int __user *p)
{
- unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9);
+ int val = min_t(int, q->sg_reserved_size, max_sectors_bytes(q));
return put_user(val, p);
}
@@ -98,10 +107,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p)
if (size < 0)
return -EINVAL;
- if (size > (queue_max_sectors(q) << 9))
- size = queue_max_sectors(q) << 9;
- q->sg_reserved_size = size;
+ q->sg_reserved_size = min(size, max_sectors_bytes(q));
return 0;
}
@@ -283,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
unsigned long start_time;
ssize_t ret = 0;
int writing = 0;
+ int at_head = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -306,6 +314,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
case SG_DXFER_FROM_DEV:
break;
}
+ if (hdr->flags & SG_FLAG_Q_AT_HEAD)
+ at_head = 1;
rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
if (!rq)
@@ -362,7 +372,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
* (if he doesn't check that is his problem).
* N.B. a non-zero SCSI status is _not_ necessarily an error.
*/
- blk_execute_rq(q, bd_disk, rq, 0);
+ blk_execute_rq(q, bd_disk, rq, at_head);
hdr->duration = jiffies_to_msecs(jiffies - start_time);
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 8b450338075..4464e353c1e 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -3,5 +3,6 @@ drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
drbd-y += drbd_interval.o drbd_state.o
drbd-y += drbd_nla.o
+drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 05a1780ffa8..d26a3fa6368 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -92,34 +92,26 @@ struct __packed al_transaction_on_disk {
__be32 context[AL_CONTEXT_PER_TRANSACTION];
};
-struct update_odbm_work {
- struct drbd_work w;
- struct drbd_device *device;
- unsigned int enr;
-};
-
-struct update_al_work {
- struct drbd_work w;
- struct drbd_device *device;
- struct completion event;
- int err;
-};
-
-
-void *drbd_md_get_buffer(struct drbd_device *device)
+void *drbd_md_get_buffer(struct drbd_device *device, const char *intent)
{
int r;
wait_event(device->misc_wait,
- (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 ||
+ (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 ||
device->state.disk <= D_FAILED);
- return r ? NULL : page_address(device->md_io_page);
+ if (r)
+ return NULL;
+
+ device->md_io.current_use = intent;
+ device->md_io.start_jif = jiffies;
+ device->md_io.submit_jif = device->md_io.start_jif - 1;
+ return page_address(device->md_io.page);
}
void drbd_md_put_buffer(struct drbd_device *device)
{
- if (atomic_dec_and_test(&device->md_io_in_use))
+ if (atomic_dec_and_test(&device->md_io.in_use))
wake_up(&device->misc_wait);
}
@@ -145,10 +137,11 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
static int _drbd_md_sync_page_io(struct drbd_device *device,
struct drbd_backing_dev *bdev,
- struct page *page, sector_t sector,
- int rw, int size)
+ sector_t sector, int rw)
{
struct bio *bio;
+ /* we do all our meta data IO in aligned 4k blocks. */
+ const int size = 4096;
int err;
device->md_io.done = 0;
@@ -156,15 +149,15 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
rw |= REQ_FUA | REQ_FLUSH;
- rw |= REQ_SYNC;
+ rw |= REQ_SYNC | REQ_NOIDLE;
bio = bio_alloc_drbd(GFP_NOIO);
bio->bi_bdev = bdev->md_bdev;
bio->bi_iter.bi_sector = sector;
err = -EIO;
- if (bio_add_page(bio, page, size, 0) != size)
+ if (bio_add_page(bio, device->md_io.page, size, 0) != size)
goto out;
- bio->bi_private = &device->md_io;
+ bio->bi_private = device;
bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw;
@@ -179,7 +172,8 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
}
bio_get(bio); /* one bio_put() is in the completion handler */
- atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
+ atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
+ device->md_io.submit_jif = jiffies;
if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
bio_endio(bio, -EIO);
else
@@ -197,9 +191,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
sector_t sector, int rw)
{
int err;
- struct page *iop = device->md_io_page;
-
- D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1);
+ D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
BUG_ON(!bdev->md_bdev);
@@ -214,8 +206,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
- /* we do all our meta data IO in aligned 4k blocks. */
- err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096);
+ err = _drbd_md_sync_page_io(device, bdev, sector, rw);
if (err) {
drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
@@ -297,26 +288,12 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
return need_transaction;
}
-static int al_write_transaction(struct drbd_device *device, bool delegate);
-
-/* When called through generic_make_request(), we must delegate
- * activity log I/O to the worker thread: a further request
- * submitted via generic_make_request() within the same task
- * would be queued on current->bio_list, and would only start
- * after this function returns (see generic_make_request()).
- *
- * However, if we *are* the worker, we must not delegate to ourselves.
- */
+static int al_write_transaction(struct drbd_device *device);
-/*
- * @delegate: delegate activity log I/O to the worker thread
- */
-void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
+void drbd_al_begin_io_commit(struct drbd_device *device)
{
bool locked = false;
- BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
-
/* Serialize multiple transactions.
* This uses test_and_set_bit, memory barrier is implicit.
*/
@@ -335,7 +312,7 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
rcu_read_unlock();
if (write_al_updates)
- al_write_transaction(device, delegate);
+ al_write_transaction(device);
spin_lock_irq(&device->al_lock);
/* FIXME
if (err)
@@ -352,12 +329,10 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
/*
* @delegate: delegate activity log I/O to the worker thread
*/
-void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate)
+void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
{
- BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
-
if (drbd_al_begin_io_prepare(device, i))
- drbd_al_begin_io_commit(device, delegate);
+ drbd_al_begin_io_commit(device);
}
int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
@@ -380,8 +355,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
/* We want all necessary updates for a given request within the same transaction
* We could first check how many updates are *actually* needed,
* and use that instead of the worst-case nr_al_extents */
- if (available_update_slots < nr_al_extents)
- return -EWOULDBLOCK;
+ if (available_update_slots < nr_al_extents) {
+ /* Too many activity log extents are currently "hot".
+ *
+ * If we have accumulated pending changes already,
+ * we made progress.
+ *
+ * If we cannot get even a single pending change through,
+ * stop the fast path until we made some progress,
+ * or requests to "cold" extents could be starved. */
+ if (!al->pending_changes)
+ __set_bit(__LC_STARVING, &device->act_log->flags);
+ return -ENOBUFS;
+ }
/* Is resync active in this area? */
for (enr = first; enr <= last; enr++) {
@@ -452,15 +438,6 @@ static unsigned int al_extent_to_bm_page(unsigned int al_enr)
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
}
-static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
-{
- return rs_enr >>
- /* bit to page */
- ((PAGE_SHIFT + 3) -
- /* resync extent number to bit */
- (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
-}
-
static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
{
const unsigned int stripes = device->ldev->md.al_stripes;
@@ -479,8 +456,7 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
}
-static int
-_al_write_transaction(struct drbd_device *device)
+int al_write_transaction(struct drbd_device *device)
{
struct al_transaction_on_disk *buffer;
struct lc_element *e;
@@ -505,7 +481,8 @@ _al_write_transaction(struct drbd_device *device)
return -EIO;
}
- buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */
+ /* protects md_io_buffer, al_tr_cycle, ... */
+ buffer = drbd_md_get_buffer(device, __func__);
if (!buffer) {
drbd_err(device, "disk failed while waiting for md_io buffer\n");
put_ldev(device);
@@ -590,38 +567,6 @@ _al_write_transaction(struct drbd_device *device)
return err;
}
-
-static int w_al_write_transaction(struct drbd_work *w, int unused)
-{
- struct update_al_work *aw = container_of(w, struct update_al_work, w);
- struct drbd_device *device = aw->device;
- int err;
-
- err = _al_write_transaction(device);
- aw->err = err;
- complete(&aw->event);
-
- return err != -EIO ? err : 0;
-}
-
-/* Calls from worker context (see w_restart_disk_io()) need to write the
- transaction directly. Others came through generic_make_request(),
- those need to delegate it to the worker. */
-static int al_write_transaction(struct drbd_device *device, bool delegate)
-{
- if (delegate) {
- struct update_al_work al_work;
- init_completion(&al_work.event);
- al_work.w.cb = w_al_write_transaction;
- al_work.device = device;
- drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
- &al_work.w);
- wait_for_completion(&al_work.event);
- return al_work.err;
- } else
- return _al_write_transaction(device);
-}
-
static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
{
int rv;
@@ -682,72 +627,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer)
return 0;
}
-static int w_update_odbm(struct drbd_work *w, int unused)
-{
- struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
- struct drbd_device *device = udw->device;
- struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
-
- if (!get_ldev(device)) {
- if (__ratelimit(&drbd_ratelimit_state))
- drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n");
- kfree(udw);
- return 0;
- }
-
- drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr));
- put_ldev(device);
-
- kfree(udw);
-
- if (drbd_bm_total_weight(device) <= device->rs_failed) {
- switch (device->state.conn) {
- case C_SYNC_SOURCE: case C_SYNC_TARGET:
- case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
- drbd_resync_finished(device);
- default:
- /* nothing to do */
- break;
- }
- }
- drbd_bcast_event(device, &sib);
-
- return 0;
-}
-
+static const char *drbd_change_sync_fname[] = {
+ [RECORD_RS_FAILED] = "drbd_rs_failed_io",
+ [SET_IN_SYNC] = "drbd_set_in_sync",
+ [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
+};
/* ATTENTION. The AL's extents are 4MB each, while the extents in the
* resync LRU-cache are 16MB each.
* The caller of this function has to hold an get_ldev() reference.
*
+ * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
+ * potentially pulling in (and recounting the corresponding bits)
+ * this resync extent into the resync extent lru cache.
+ *
+ * Returns whether all bits have been cleared for this resync extent,
+ * precisely: (rs_left <= rs_failed)
+ *
* TODO will be obsoleted once we have a caching lru of the on disk bitmap
*/
-static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector,
- int count, int success)
+static bool update_rs_extent(struct drbd_device *device,
+ unsigned int enr, int count,
+ enum update_sync_bits_mode mode)
{
struct lc_element *e;
- struct update_odbm_work *udw;
-
- unsigned int enr;
D_ASSERT(device, atomic_read(&device->local_cnt));
- /* I simply assume that a sector/size pair never crosses
- * a 16 MB extent border. (Currently this is true...) */
- enr = BM_SECT_TO_EXT(sector);
-
- e = lc_get(device->resync, enr);
+ /* When setting out-of-sync bits,
+ * we don't need it cached (lc_find).
+ * But if it is present in the cache,
+ * we should update the cached bit count.
+ * Otherwise, that extent should be in the resync extent lru cache
+ * already -- or we want to pull it in if necessary -- (lc_get),
+ * then update and check rs_left and rs_failed. */
+ if (mode == SET_OUT_OF_SYNC)
+ e = lc_find(device->resync, enr);
+ else
+ e = lc_get(device->resync, enr);
if (e) {
struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
if (ext->lce.lc_number == enr) {
- if (success)
+ if (mode == SET_IN_SYNC)
ext->rs_left -= count;
+ else if (mode == SET_OUT_OF_SYNC)
+ ext->rs_left += count;
else
ext->rs_failed += count;
if (ext->rs_left < ext->rs_failed) {
- drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d "
+ drbd_warn(device, "BAD! enr=%u rs_left=%d "
"rs_failed=%d count=%d cstate=%s\n",
- (unsigned long long)sector,
ext->lce.lc_number, ext->rs_left,
ext->rs_failed, count,
drbd_conn_str(device->state.conn));
@@ -781,34 +710,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto
ext->lce.lc_number, ext->rs_failed);
}
ext->rs_left = rs_left;
- ext->rs_failed = success ? 0 : count;
+ ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
/* we don't keep a persistent log of the resync lru,
* we can commit any change right away. */
lc_committed(device->resync);
}
- lc_put(device->resync, &ext->lce);
+ if (mode != SET_OUT_OF_SYNC)
+ lc_put(device->resync, &ext->lce);
/* no race, we are within the al_lock! */
- if (ext->rs_left == ext->rs_failed) {
+ if (ext->rs_left <= ext->rs_failed) {
ext->rs_failed = 0;
-
- udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
- if (udw) {
- udw->enr = ext->lce.lc_number;
- udw->w.cb = w_update_odbm;
- udw->device = device;
- drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
- &udw->w);
- } else {
- drbd_warn(device, "Could not kmalloc an udw\n");
- }
+ return true;
}
- } else {
+ } else if (mode != SET_OUT_OF_SYNC) {
+ /* be quiet if lc_find() did not find it. */
drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
device->resync_locked,
device->resync->nr_elements,
device->resync->flags);
}
+ return false;
}
void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
@@ -827,105 +749,105 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go
}
}
-/* clear the bit corresponding to the piece of storage in question:
- * size byte of data starting from sector. Only clear a bits of the affected
- * one ore more _aligned_ BM_BLOCK_SIZE blocks.
- *
- * called by worker on C_SYNC_TARGET and receiver on SyncSource.
- *
- */
-void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
- const char *file, const unsigned int line)
+/* It is called lazy update, so don't do write-out too often. */
+static bool lazy_bitmap_update_due(struct drbd_device *device)
{
- /* Is called from worker and receiver context _only_ */
- unsigned long sbnr, ebnr, lbnr;
- unsigned long count = 0;
- sector_t esector, nr_sectors;
- int wake_up = 0;
- unsigned long flags;
+ return time_after(jiffies, device->rs_last_bcast + 2*HZ);
+}
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
- (unsigned long long)sector, size);
+static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
+{
+ if (rs_done)
+ set_bit(RS_DONE, &device->flags);
+ /* and also set RS_PROGRESS below */
+ else if (!lazy_bitmap_update_due(device))
return;
- }
-
- if (!get_ldev(device))
- return; /* no disk, no metadata, no bitmap to clear bits in */
-
- nr_sectors = drbd_get_capacity(device->this_bdev);
- esector = sector + (size >> 9) - 1;
-
- if (!expect(sector < nr_sectors))
- goto out;
- if (!expect(esector < nr_sectors))
- esector = nr_sectors - 1;
-
- lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
- /* we clear it (in sync).
- * round up start sector, round down end sector. we make sure we only
- * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
- if (unlikely(esector < BM_SECT_PER_BIT-1))
- goto out;
- if (unlikely(esector == (nr_sectors-1)))
- ebnr = lbnr;
- else
- ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
- sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
- if (sbnr > ebnr)
- goto out;
+ drbd_device_post_work(device, RS_PROGRESS);
+}
+static int update_sync_bits(struct drbd_device *device,
+ unsigned long sbnr, unsigned long ebnr,
+ enum update_sync_bits_mode mode)
+{
/*
- * ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors.
+ * We keep a count of set bits per resync-extent in the ->rs_left
+ * caching member, so we need to loop and work within the resync extent
+ * alignment. Typically this loop will execute exactly once.
*/
- count = drbd_bm_clear_bits(device, sbnr, ebnr);
- if (count) {
- drbd_advance_rs_marks(device, drbd_bm_total_weight(device));
- spin_lock_irqsave(&device->al_lock, flags);
- drbd_try_clear_on_disk_bm(device, sector, count, true);
- spin_unlock_irqrestore(&device->al_lock, flags);
-
- /* just wake_up unconditional now, various lc_chaged(),
- * lc_put() in drbd_try_clear_on_disk_bm(). */
- wake_up = 1;
+ unsigned long flags;
+ unsigned long count = 0;
+ unsigned int cleared = 0;
+ while (sbnr <= ebnr) {
+ /* set temporary boundary bit number to last bit number within
+ * the resync extent of the current start bit number,
+ * but cap at provided end bit number */
+ unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
+ unsigned long c;
+
+ if (mode == RECORD_RS_FAILED)
+ /* Only called from drbd_rs_failed_io(), bits
+ * supposedly still set. Recount, maybe some
+ * of the bits have been successfully cleared
+ * by application IO meanwhile.
+ */
+ c = drbd_bm_count_bits(device, sbnr, tbnr);
+ else if (mode == SET_IN_SYNC)
+ c = drbd_bm_clear_bits(device, sbnr, tbnr);
+ else /* if (mode == SET_OUT_OF_SYNC) */
+ c = drbd_bm_set_bits(device, sbnr, tbnr);
+
+ if (c) {
+ spin_lock_irqsave(&device->al_lock, flags);
+ cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
+ spin_unlock_irqrestore(&device->al_lock, flags);
+ count += c;
+ }
+ sbnr = tbnr + 1;
}
-out:
- put_ldev(device);
- if (wake_up)
+ if (count) {
+ if (mode == SET_IN_SYNC) {
+ unsigned long still_to_go = drbd_bm_total_weight(device);
+ bool rs_is_done = (still_to_go <= device->rs_failed);
+ drbd_advance_rs_marks(device, still_to_go);
+ if (cleared || rs_is_done)
+ maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
+ } else if (mode == RECORD_RS_FAILED)
+ device->rs_failed += count;
wake_up(&device->al_wait);
+ }
+ return count;
}
-/*
- * this is intended to set one request worth of data out of sync.
- * affects at least 1 bit,
- * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector. Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
*
- * called by tl_clear and drbd_send_dblock (==drbd_make_request).
- * so this can be _any_ process.
*/
-int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size,
- const char *file, const unsigned int line)
+int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+ enum update_sync_bits_mode mode,
+ const char *file, const unsigned int line)
{
- unsigned long sbnr, ebnr, flags;
+ /* Is called from worker and receiver context _only_ */
+ unsigned long sbnr, ebnr, lbnr;
+ unsigned long count = 0;
sector_t esector, nr_sectors;
- unsigned int enr, count = 0;
- struct lc_element *e;
- /* this should be an empty REQ_FLUSH */
- if (size == 0)
+ /* This would be an empty REQ_FLUSH, be silent. */
+ if ((mode == SET_OUT_OF_SYNC) && size == 0)
return 0;
- if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "sector: %llus, size: %d\n",
- (unsigned long long)sector, size);
+ if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
+ drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
+ drbd_change_sync_fname[mode],
+ (unsigned long long)sector, size);
return 0;
}
if (!get_ldev(device))
- return 0; /* no disk, no metadata, no bitmap to set bits in */
+ return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
nr_sectors = drbd_get_capacity(device->this_bdev);
esector = sector + (size >> 9) - 1;
@@ -935,25 +857,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
if (!expect(esector < nr_sectors))
esector = nr_sectors - 1;
- /* we set it out of sync,
- * we do not need to round anything here */
- sbnr = BM_SECT_TO_BIT(sector);
- ebnr = BM_SECT_TO_BIT(esector);
-
- /* ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors. */
- spin_lock_irqsave(&device->al_lock, flags);
- count = drbd_bm_set_bits(device, sbnr, ebnr);
+ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
- enr = BM_SECT_TO_EXT(sector);
- e = lc_find(device->resync, enr);
- if (e)
- lc_entry(e, struct bm_extent, lce)->rs_left += count;
- spin_unlock_irqrestore(&device->al_lock, flags);
+ if (mode == SET_IN_SYNC) {
+ /* Round up start sector, round down end sector. We make sure
+ * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
+ if (unlikely(esector < BM_SECT_PER_BIT-1))
+ goto out;
+ if (unlikely(esector == (nr_sectors-1)))
+ ebnr = lbnr;
+ else
+ ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+ sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+ } else {
+ /* We set it out of sync, or record resync failure.
+ * Should not round anything here. */
+ sbnr = BM_SECT_TO_BIT(sector);
+ ebnr = BM_SECT_TO_BIT(esector);
+ }
+ count = update_sync_bits(device, sbnr, ebnr, mode);
out:
put_ldev(device);
-
return count;
}
@@ -1075,6 +1000,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
struct lc_element *e;
struct bm_extent *bm_ext;
int i;
+ bool throttle = drbd_rs_should_slow_down(device, sector, true);
+
+ /* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
+ * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
+ * need to throttle. There is at most one such half-locked extent,
+ * which is remembered in resync_wenr. */
+
+ if (throttle && device->resync_wenr != enr)
+ return -EAGAIN;
spin_lock_irq(&device->al_lock);
if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
@@ -1098,8 +1032,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
clear_bit(BME_NO_WRITES, &bm_ext->flags);
device->resync_wenr = LC_FREE;
- if (lc_put(device->resync, &bm_ext->lce) == 0)
+ if (lc_put(device->resync, &bm_ext->lce) == 0) {
+ bm_ext->flags = 0;
device->resync_locked--;
+ }
wake_up(&device->al_wait);
} else {
drbd_alert(device, "LOGIC BUG\n");
@@ -1161,8 +1097,20 @@ proceed:
return 0;
try_again:
- if (bm_ext)
- device->resync_wenr = enr;
+ if (bm_ext) {
+ if (throttle) {
+ D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
+ D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
+ clear_bit(BME_NO_WRITES, &bm_ext->flags);
+ device->resync_wenr = LC_FREE;
+ if (lc_put(device->resync, &bm_ext->lce) == 0) {
+ bm_ext->flags = 0;
+ device->resync_locked--;
+ }
+ wake_up(&device->al_wait);
+ } else
+ device->resync_wenr = enr;
+ }
spin_unlock_irq(&device->al_lock);
return -EAGAIN;
}
@@ -1270,69 +1218,3 @@ int drbd_rs_del_all(struct drbd_device *device)
return 0;
}
-
-/**
- * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
- * @device: DRBD device.
- * @sector: The sector number.
- * @size: Size of failed IO operation, in byte.
- */
-void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
-{
- /* Is called from worker and receiver context _only_ */
- unsigned long sbnr, ebnr, lbnr;
- unsigned long count;
- sector_t esector, nr_sectors;
- int wake_up = 0;
-
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
- (unsigned long long)sector, size);
- return;
- }
- nr_sectors = drbd_get_capacity(device->this_bdev);
- esector = sector + (size >> 9) - 1;
-
- if (!expect(sector < nr_sectors))
- return;
- if (!expect(esector < nr_sectors))
- esector = nr_sectors - 1;
-
- lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
- /*
- * round up start sector, round down end sector. we make sure we only
- * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
- if (unlikely(esector < BM_SECT_PER_BIT-1))
- return;
- if (unlikely(esector == (nr_sectors-1)))
- ebnr = lbnr;
- else
- ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
- sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
-
- if (sbnr > ebnr)
- return;
-
- /*
- * ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors.
- */
- spin_lock_irq(&device->al_lock);
- count = drbd_bm_count_bits(device, sbnr, ebnr);
- if (count) {
- device->rs_failed += count;
-
- if (get_ldev(device)) {
- drbd_try_clear_on_disk_bm(device, sector, count, false);
- put_ldev(device);
- }
-
- /* just wake_up unconditional now, various lc_chaged(),
- * lc_put() in drbd_try_clear_on_disk_bm(). */
- wake_up = 1;
- }
- spin_unlock_irq(&device->al_lock);
- if (wake_up)
- wake_up(&device->al_wait);
-}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 1aa29f8fdfe..426c97aef90 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -22,6 +22,8 @@
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/bitops.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
@@ -353,9 +355,8 @@ static void bm_free_pages(struct page **pages, unsigned long number)
for (i = 0; i < number; i++) {
if (!pages[i]) {
- printk(KERN_ALERT "drbd: bm_free_pages tried to free "
- "a NULL pointer; i=%lu n=%lu\n",
- i, number);
+ pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n",
+ i, number);
continue;
}
__free_page(pages[i]);
@@ -592,7 +593,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
end = offset + len;
if (end > b->bm_words) {
- printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
+ pr_alert("bm_memset end > bm_words\n");
return;
}
@@ -602,7 +603,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
p_addr = bm_map_pidx(b, idx);
bm = p_addr + MLPP(offset);
if (bm+do_now > p_addr + LWPP) {
- printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
+ pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
p_addr, bm, (int)do_now);
} else
memset(bm, c, do_now * sizeof(long));
@@ -927,22 +928,14 @@ void drbd_bm_clear_all(struct drbd_device *device)
spin_unlock_irq(&b->bm_lock);
}
-struct bm_aio_ctx {
- struct drbd_device *device;
- atomic_t in_flight;
- unsigned int done;
- unsigned flags;
-#define BM_AIO_COPY_PAGES 1
-#define BM_AIO_WRITE_HINTED 2
-#define BM_WRITE_ALL_PAGES 4
- int error;
- struct kref kref;
-};
-
-static void bm_aio_ctx_destroy(struct kref *kref)
+static void drbd_bm_aio_ctx_destroy(struct kref *kref)
{
- struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+ struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref);
+ unsigned long flags;
+ spin_lock_irqsave(&ctx->device->resource->req_lock, flags);
+ list_del(&ctx->list);
+ spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags);
put_ldev(ctx->device);
kfree(ctx);
}
@@ -950,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref)
/* bv_page may be a copy, or may be the original */
static void bm_async_io_complete(struct bio *bio, int error)
{
- struct bm_aio_ctx *ctx = bio->bi_private;
+ struct drbd_bm_aio_ctx *ctx = bio->bi_private;
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
@@ -993,17 +986,18 @@ static void bm_async_io_complete(struct bio *bio, int error)
if (atomic_dec_and_test(&ctx->in_flight)) {
ctx->done = 1;
wake_up(&device->misc_wait);
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
}
}
-static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
+static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
struct bio *bio = bio_alloc_drbd(GFP_NOIO);
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
struct page *page;
unsigned int len;
+ unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE;
sector_t on_disk_sector =
device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1049,9 +1043,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
/*
* bm_rw: read/write the whole bitmap from/to its on disk location.
*/
-static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
+static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
{
- struct bm_aio_ctx *ctx;
+ struct drbd_bm_aio_ctx *ctx;
struct drbd_bitmap *b = device->bitmap;
int num_pages, i, count = 0;
unsigned long now;
@@ -1067,12 +1061,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
* as we submit copies of pages anyways.
*/
- ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+ ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO);
if (!ctx)
return -ENOMEM;
- *ctx = (struct bm_aio_ctx) {
+ *ctx = (struct drbd_bm_aio_ctx) {
.device = device,
+ .start_jif = jiffies,
.in_flight = ATOMIC_INIT(1),
.done = 0,
.flags = flags,
@@ -1080,15 +1075,21 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
.kref = { ATOMIC_INIT(2) },
};
- if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
+ if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */
drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
kfree(ctx);
return -ENODEV;
}
+ /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
+ drbd_adm_attach(), after device->ldev was assigned. */
- if (!ctx->flags)
+ if (0 == (ctx->flags & ~BM_AIO_READ))
WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
+ spin_lock_irq(&device->resource->req_lock);
+ list_add_tail(&ctx->list, &device->pending_bitmap_io);
+ spin_unlock_irq(&device->resource->req_lock);
+
num_pages = b->bm_number_of_pages;
now = jiffies;
@@ -1098,13 +1099,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
/* ignore completely unchanged pages */
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
- if (rw & WRITE) {
+ if (!(flags & BM_AIO_READ)) {
if ((flags & BM_AIO_WRITE_HINTED) &&
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
&page_private(b->bm_pages[i])))
continue;
- if (!(flags & BM_WRITE_ALL_PAGES) &&
+ if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
continue;
@@ -1118,7 +1119,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
}
}
atomic_inc(&ctx->in_flight);
- bm_page_io_async(ctx, i, rw);
+ bm_page_io_async(ctx, i);
++count;
cond_resched();
}
@@ -1134,12 +1135,12 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
if (!atomic_dec_and_test(&ctx->in_flight))
wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
else
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
/* summary for global bitmap IO */
if (flags == 0)
drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
- rw == WRITE ? "WRITE" : "READ",
+ (flags & BM_AIO_READ) ? "READ" : "WRITE",
count, jiffies - now);
if (ctx->error) {
@@ -1152,20 +1153,18 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
err = -EIO; /* Disk timeout/force-detach during IO... */
now = jiffies;
- if (rw == WRITE) {
- drbd_md_flush(device);
- } else /* rw == READ */ {
+ if (flags & BM_AIO_READ) {
b->bm_set = bm_count_bits(b);
drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
jiffies - now);
}
now = b->bm_set;
- if (flags == 0)
+ if ((flags & ~BM_AIO_READ) == 0)
drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
return err;
}
@@ -1175,7 +1174,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
*/
int drbd_bm_read(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, READ, 0, 0);
+ return bm_rw(device, BM_AIO_READ, 0);
}
/**
@@ -1186,7 +1185,7 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, 0, 0);
+ return bm_rw(device, 0, 0);
}
/**
@@ -1197,7 +1196,17 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0);
+ return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
+}
+
+/**
+ * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
+ * @device: DRBD device.
+ * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
+ */
+int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
+{
+ return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
}
/**
@@ -1213,7 +1222,7 @@ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0);
+ return bm_rw(device, BM_AIO_COPY_PAGES, 0);
}
/**
@@ -1222,62 +1231,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
-}
-
-/**
- * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
- * @device: DRBD device.
- * @idx: bitmap page index
- *
- * We don't want to special case on logical_block_size of the backend device,
- * so we submit PAGE_SIZE aligned pieces.
- * Note that on "most" systems, PAGE_SIZE is 4k.
- *
- * In case this becomes an issue on systems with larger PAGE_SIZE,
- * we may want to change this again to write 4k aligned 4k pieces.
- */
-int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local)
-{
- struct bm_aio_ctx *ctx;
- int err;
-
- if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) {
- dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx);
- return 0;
- }
-
- ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
- if (!ctx)
- return -ENOMEM;
-
- *ctx = (struct bm_aio_ctx) {
- .device = device,
- .in_flight = ATOMIC_INIT(1),
- .done = 0,
- .flags = BM_AIO_COPY_PAGES,
- .error = 0,
- .kref = { ATOMIC_INIT(2) },
- };
-
- if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
- drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
- kfree(ctx);
- return -ENODEV;
- }
-
- bm_page_io_async(ctx, idx, WRITE_SYNC);
- wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
-
- if (ctx->error)
- drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
- /* that causes us to detach, so the in memory bitmap will be
- * gone in a moment as well. */
-
- device->bm_writ_cnt++;
- err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
- return err;
+ return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
}
/* NOTE
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
new file mode 100644
index 00000000000..5c20b18540b
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -0,0 +1,958 @@
+#define pr_fmt(fmt) "drbd debugfs: " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+
+#include "drbd_int.h"
+#include "drbd_req.h"
+#include "drbd_debugfs.h"
+
+
+/**********************************************************************
+ * Whenever you change the file format, remember to bump the version. *
+ **********************************************************************/
+
+static struct dentry *drbd_debugfs_root;
+static struct dentry *drbd_debugfs_version;
+static struct dentry *drbd_debugfs_resources;
+static struct dentry *drbd_debugfs_minors;
+
+static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt)
+{
+ if (valid)
+ seq_printf(m, "\t%d", jiffies_to_msecs(dt));
+ else
+ seq_printf(m, "\t-");
+}
+
+static void __seq_print_rq_state_bit(struct seq_file *m,
+ bool is_set, char *sep, const char *set_name, const char *unset_name)
+{
+ if (is_set && set_name) {
+ seq_putc(m, *sep);
+ seq_puts(m, set_name);
+ *sep = '|';
+ } else if (!is_set && unset_name) {
+ seq_putc(m, *sep);
+ seq_puts(m, unset_name);
+ *sep = '|';
+ }
+}
+
+static void seq_print_rq_state_bit(struct seq_file *m,
+ bool is_set, char *sep, const char *set_name)
+{
+ __seq_print_rq_state_bit(m, is_set, sep, set_name, NULL);
+}
+
+/* pretty print enum drbd_req_state_bits req->rq_state */
+static void seq_print_request_state(struct seq_file *m, struct drbd_request *req)
+{
+ unsigned int s = req->rq_state;
+ char sep = ' ';
+ seq_printf(m, "\t0x%08x", s);
+ seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed");
+
+ /* RQ_WRITE ignored, already reported */
+ seq_puts(m, "\tlocal:");
+ seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL");
+ seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed");
+ seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended");
+ sep = ' ';
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending");
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed");
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted");
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok");
+ if (sep == ' ')
+ seq_puts(m, " -");
+
+ /* for_each_connection ... */
+ seq_printf(m, "\tnet:");
+ sep = ' ';
+ seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending");
+ seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued");
+ seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent");
+ seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done");
+ seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis");
+ seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok");
+ if (sep == ' ')
+ seq_puts(m, " -");
+
+ seq_printf(m, " :");
+ sep = ' ';
+ seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B");
+ seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C");
+ seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr");
+ if (sep == ' ')
+ seq_puts(m, " -");
+ seq_printf(m, "\n");
+}
+
+static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now)
+{
+ /* change anything here, fixup header below! */
+ unsigned int s = req->rq_state;
+
+#define RQ_HDR_1 "epoch\tsector\tsize\trw"
+ seq_printf(m, "0x%x\t%llu\t%u\t%s",
+ req->epoch,
+ (unsigned long long)req->i.sector, req->i.size >> 9,
+ (s & RQ_WRITE) ? "W" : "R");
+
+#define RQ_HDR_2 "\tstart\tin AL\tsubmit"
+ seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif));
+ seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif);
+ seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif);
+
+#define RQ_HDR_3 "\tsent\tacked\tdone"
+ seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif);
+ seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif);
+ seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif);
+
+#define RQ_HDR_4 "\tstate\n"
+ seq_print_request_state(m, req);
+}
+#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4
+
+static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now)
+{
+ seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr);
+ seq_print_one_request(m, req, now);
+}
+
+static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n");
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ struct drbd_md_io tmp;
+ /* In theory this is racy,
+ * in the sense that there could have been a
+ * drbd_md_put_buffer(); drbd_md_get_buffer();
+ * between accessing these members here. */
+ tmp = device->md_io;
+ if (atomic_read(&tmp.in_use)) {
+ seq_printf(m, "%u\t%u\t%d\t",
+ device->minor, device->vnr,
+ jiffies_to_msecs(now - tmp.start_jif));
+ if (time_before(tmp.submit_jif, tmp.start_jif))
+ seq_puts(m, "-\t");
+ else
+ seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif));
+ seq_printf(m, "%s\n", tmp.current_use);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ seq_puts(m, "minor\tvnr\tage\t#waiting\n");
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ unsigned long jif;
+ struct drbd_request *req;
+ int n = atomic_read(&device->ap_actlog_cnt);
+ if (n) {
+ spin_lock_irq(&device->resource->req_lock);
+ req = list_first_entry_or_null(&device->pending_master_completion[1],
+ struct drbd_request, req_pending_master_completion);
+ /* if the oldest request does not wait for the activity log
+ * it is not interesting for us here */
+ if (req && !(req->rq_state & RQ_IN_ACT_LOG))
+ jif = req->start_jif;
+ else
+ req = NULL;
+ spin_unlock_irq(&device->resource->req_lock);
+ }
+ if (n) {
+ seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
+ if (req)
+ seq_printf(m, "%u\t", jiffies_to_msecs(now - jif));
+ else
+ seq_puts(m, "-\t");
+ seq_printf(m, "%u\n", n);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now)
+{
+ struct drbd_bm_aio_ctx *ctx;
+ unsigned long start_jif;
+ unsigned int in_flight;
+ unsigned int flags;
+ spin_lock_irq(&device->resource->req_lock);
+ ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list);
+ if (ctx && ctx->done)
+ ctx = NULL;
+ if (ctx) {
+ start_jif = ctx->start_jif;
+ in_flight = atomic_read(&ctx->in_flight);
+ flags = ctx->flags;
+ }
+ spin_unlock_irq(&device->resource->req_lock);
+ if (ctx) {
+ seq_printf(m, "%u\t%u\t%c\t%u\t%u\n",
+ device->minor, device->vnr,
+ (flags & BM_AIO_READ) ? 'R' : 'W',
+ jiffies_to_msecs(now - start_jif),
+ in_flight);
+ }
+}
+
+static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n");
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ seq_print_device_bitmap_io(m, device, now);
+ }
+ rcu_read_unlock();
+}
+
+/* pretty print enum peer_req->flags */
+static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req)
+{
+ unsigned long f = peer_req->flags;
+ char sep = ' ';
+
+ __seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing");
+ __seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal");
+ seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
+ seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
+ seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
+
+ if (f & EE_IS_TRIM) {
+ seq_putc(m, sep);
+ sep = '|';
+ if (f & EE_IS_TRIM_USE_ZEROOUT)
+ seq_puts(m, "zero-out");
+ else
+ seq_puts(m, "trim");
+ }
+ seq_putc(m, '\n');
+}
+
+static void seq_print_peer_request(struct seq_file *m,
+ struct drbd_device *device, struct list_head *lh,
+ unsigned long now)
+{
+ bool reported_preparing = false;
+ struct drbd_peer_request *peer_req;
+ list_for_each_entry(peer_req, lh, w.list) {
+ if (reported_preparing && !(peer_req->flags & EE_SUBMITTED))
+ continue;
+
+ if (device)
+ seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
+
+ seq_printf(m, "%llu\t%u\t%c\t%u\t",
+ (unsigned long long)peer_req->i.sector, peer_req->i.size >> 9,
+ (peer_req->flags & EE_WRITE) ? 'W' : 'R',
+ jiffies_to_msecs(now - peer_req->submit_jif));
+ seq_print_peer_request_flags(m, peer_req);
+ if (peer_req->flags & EE_SUBMITTED)
+ break;
+ else
+ reported_preparing = true;
+ }
+}
+
+static void seq_print_device_peer_requests(struct seq_file *m,
+ struct drbd_device *device, unsigned long now)
+{
+ seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n");
+ spin_lock_irq(&device->resource->req_lock);
+ seq_print_peer_request(m, device, &device->active_ee, now);
+ seq_print_peer_request(m, device, &device->read_ee, now);
+ seq_print_peer_request(m, device, &device->sync_ee, now);
+ spin_unlock_irq(&device->resource->req_lock);
+ if (test_bit(FLUSH_PENDING, &device->flags)) {
+ seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n",
+ device->minor, device->vnr,
+ jiffies_to_msecs(now - device->flush_jif));
+ }
+}
+
+static void seq_print_resource_pending_peer_requests(struct seq_file *m,
+ struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ seq_print_device_peer_requests(m, device, now);
+ }
+ rcu_read_unlock();
+}
+
+static void seq_print_resource_transfer_log_summary(struct seq_file *m,
+ struct drbd_resource *resource,
+ struct drbd_connection *connection,
+ unsigned long now)
+{
+ struct drbd_request *req;
+ unsigned int count = 0;
+ unsigned int show_state = 0;
+
+ seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR);
+ spin_lock_irq(&resource->req_lock);
+ list_for_each_entry(req, &connection->transfer_log, tl_requests) {
+ unsigned int tmp = 0;
+ unsigned int s;
+ ++count;
+
+ /* don't disable irq "forever" */
+ if (!(count & 0x1ff)) {
+ struct drbd_request *req_next;
+ kref_get(&req->kref);
+ spin_unlock_irq(&resource->req_lock);
+ cond_resched();
+ spin_lock_irq(&resource->req_lock);
+ req_next = list_next_entry(req, tl_requests);
+ if (kref_put(&req->kref, drbd_req_destroy))
+ req = req_next;
+ if (&req->tl_requests == &connection->transfer_log)
+ break;
+ }
+
+ s = req->rq_state;
+
+ /* This is meant to summarize timing issues, to be able to tell
+ * local disk problems from network problems.
+ * Skip requests, if we have shown an even older request with
+ * similar aspects already. */
+ if (req->master_bio == NULL)
+ tmp |= 1;
+ if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING))
+ tmp |= 2;
+ if (s & RQ_NET_MASK) {
+ if (!(s & RQ_NET_SENT))
+ tmp |= 4;
+ if (s & RQ_NET_PENDING)
+ tmp |= 8;
+ if (!(s & RQ_NET_DONE))
+ tmp |= 16;
+ }
+ if ((tmp & show_state) == tmp)
+ continue;
+ show_state |= tmp;
+ seq_printf(m, "%u\t", count);
+ seq_print_minor_vnr_req(m, req, now);
+ if (show_state == 0x1f)
+ break;
+ }
+ spin_unlock_irq(&resource->req_lock);
+}
+
+/* TODO: transfer_log and friends should be moved to resource */
+static int in_flight_summary_show(struct seq_file *m, void *pos)
+{
+ struct drbd_resource *resource = m->private;
+ struct drbd_connection *connection;
+ unsigned long jif = jiffies;
+
+ connection = first_connection(resource);
+ /* This does not happen, actually.
+ * But be robust and prepare for future code changes. */
+ if (!connection || !kref_get_unless_zero(&connection->kref))
+ return -ESTALE;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ seq_puts(m, "oldest bitmap IO\n");
+ seq_print_resource_pending_bitmap_io(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "meta data IO\n");
+ seq_print_resource_pending_meta_io(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "socket buffer stats\n");
+ /* for each connection ... once we have more than one */
+ rcu_read_lock();
+ if (connection->data.socket) {
+ /* open coded SIOCINQ, the "relevant" part */
+ struct tcp_sock *tp = tcp_sk(connection->data.socket->sk);
+ int answ = tp->rcv_nxt - tp->copied_seq;
+ seq_printf(m, "unread receive buffer: %u Byte\n", answ);
+ /* open coded SIOCOUTQ, the "relevant" part */
+ answ = tp->write_seq - tp->snd_una;
+ seq_printf(m, "unacked send buffer: %u Byte\n", answ);
+ }
+ rcu_read_unlock();
+ seq_putc(m, '\n');
+
+ seq_puts(m, "oldest peer requests\n");
+ seq_print_resource_pending_peer_requests(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "application requests waiting for activity log\n");
+ seq_print_waiting_for_AL(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "oldest application requests\n");
+ seq_print_resource_transfer_log_summary(m, resource, connection, jif);
+ seq_putc(m, '\n');
+
+ jif = jiffies - jif;
+ if (jif)
+ seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif));
+ kref_put(&connection->kref, drbd_destroy_connection);
+ return 0;
+}
+
+/* simple_positive(file->f_dentry) respectively debugfs_positive(),
+ * but neither is "reachable" from here.
+ * So we have our own inline version of it above. :-( */
+static inline int debugfs_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+/* make sure at *open* time that the respective object won't go away. */
+static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),
+ void *data, struct kref *kref,
+ void (*release)(struct kref *))
+{
+ struct dentry *parent;
+ int ret = -ESTALE;
+
+ /* Are we still linked,
+ * or has debugfs_remove() already been called? */
+ parent = file->f_dentry->d_parent;
+ /* not sure if this can happen: */
+ if (!parent || !parent->d_inode)
+ goto out;
+ /* serialize with d_delete() */
+ mutex_lock(&parent->d_inode->i_mutex);
+ /* Make sure the object is still alive */
+ if (debugfs_positive(file->f_dentry)
+ && kref_get_unless_zero(kref))
+ ret = 0;
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (!ret) {
+ ret = single_open(file, show, data);
+ if (ret)
+ kref_put(kref, release);
+ }
+out:
+ return ret;
+}
+
+static int in_flight_summary_open(struct inode *inode, struct file *file)
+{
+ struct drbd_resource *resource = inode->i_private;
+ return drbd_single_open(file, in_flight_summary_show, resource,
+ &resource->kref, drbd_destroy_resource);
+}
+
+static int in_flight_summary_release(struct inode *inode, struct file *file)
+{
+ struct drbd_resource *resource = inode->i_private;
+ kref_put(&resource->kref, drbd_destroy_resource);
+ return single_release(inode, file);
+}
+
+static const struct file_operations in_flight_summary_fops = {
+ .owner = THIS_MODULE,
+ .open = in_flight_summary_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = in_flight_summary_release,
+};
+
+void drbd_debugfs_resource_add(struct drbd_resource *resource)
+{
+ struct dentry *dentry;
+ if (!drbd_debugfs_resources)
+ return;
+
+ dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res = dentry;
+
+ dentry = debugfs_create_dir("volumes", resource->debugfs_res);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res_volumes = dentry;
+
+ dentry = debugfs_create_dir("connections", resource->debugfs_res);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res_connections = dentry;
+
+ dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
+ resource->debugfs_res, resource,
+ &in_flight_summary_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res_in_flight_summary = dentry;
+ return;
+
+fail:
+ drbd_debugfs_resource_cleanup(resource);
+ drbd_err(resource, "failed to create debugfs dentry\n");
+}
+
+static void drbd_debugfs_remove(struct dentry **dp)
+{
+ debugfs_remove(*dp);
+ *dp = NULL;
+}
+
+void drbd_debugfs_resource_cleanup(struct drbd_resource *resource)
+{
+ /* it is ok to call debugfs_remove(NULL) */
+ drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary);
+ drbd_debugfs_remove(&resource->debugfs_res_connections);
+ drbd_debugfs_remove(&resource->debugfs_res_volumes);
+ drbd_debugfs_remove(&resource->debugfs_res);
+}
+
+static void seq_print_one_timing_detail(struct seq_file *m,
+ const struct drbd_thread_timing_details *tdp,
+ unsigned long now)
+{
+ struct drbd_thread_timing_details td;
+ /* No locking...
+ * use temporary assignment to get at consistent data. */
+ do {
+ td = *tdp;
+ } while (td.cb_nr != tdp->cb_nr);
+ if (!td.cb_addr)
+ return;
+ seq_printf(m, "%u\t%d\t%s:%u\t%ps\n",
+ td.cb_nr,
+ jiffies_to_msecs(now - td.start_jif),
+ td.caller_fn, td.line,
+ td.cb_addr);
+}
+
+static void seq_print_timing_details(struct seq_file *m,
+ const char *title,
+ unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now)
+{
+ unsigned int start_idx;
+ unsigned int i;
+
+ seq_printf(m, "%s\n", title);
+ /* If not much is going on, this will result in natural ordering.
+ * If it is very busy, we will possibly skip events, or even see wrap
+ * arounds, which could only be avoided with locking.
+ */
+ start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST;
+ for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++)
+ seq_print_one_timing_detail(m, tdp+i, now);
+ for (i = 0; i < start_idx; i++)
+ seq_print_one_timing_detail(m, tdp+i, now);
+}
+
+static int callback_history_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_connection *connection = m->private;
+ unsigned long jif = jiffies;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ seq_puts(m, "n\tage\tcallsite\tfn\n");
+ seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif);
+ seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif);
+ return 0;
+}
+
+static int callback_history_open(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ return drbd_single_open(file, callback_history_show, connection,
+ &connection->kref, drbd_destroy_connection);
+}
+
+static int callback_history_release(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ kref_put(&connection->kref, drbd_destroy_connection);
+ return single_release(inode, file);
+}
+
+static const struct file_operations connection_callback_history_fops = {
+ .owner = THIS_MODULE,
+ .open = callback_history_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = callback_history_release,
+};
+
+static int connection_oldest_requests_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_connection *connection = m->private;
+ unsigned long now = jiffies;
+ struct drbd_request *r1, *r2;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ spin_lock_irq(&connection->resource->req_lock);
+ r1 = connection->req_next;
+ if (r1)
+ seq_print_minor_vnr_req(m, r1, now);
+ r2 = connection->req_ack_pending;
+ if (r2 && r2 != r1) {
+ r1 = r2;
+ seq_print_minor_vnr_req(m, r1, now);
+ }
+ r2 = connection->req_not_net_done;
+ if (r2 && r2 != r1)
+ seq_print_minor_vnr_req(m, r2, now);
+ spin_unlock_irq(&connection->resource->req_lock);
+ return 0;
+}
+
+static int connection_oldest_requests_open(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ return drbd_single_open(file, connection_oldest_requests_show, connection,
+ &connection->kref, drbd_destroy_connection);
+}
+
+static int connection_oldest_requests_release(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ kref_put(&connection->kref, drbd_destroy_connection);
+ return single_release(inode, file);
+}
+
+static const struct file_operations connection_oldest_requests_fops = {
+ .owner = THIS_MODULE,
+ .open = connection_oldest_requests_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = connection_oldest_requests_release,
+};
+
+void drbd_debugfs_connection_add(struct drbd_connection *connection)
+{
+ struct dentry *conns_dir = connection->resource->debugfs_res_connections;
+ struct dentry *dentry;
+ if (!conns_dir)
+ return;
+
+ /* Once we enable mutliple peers,
+ * these connections will have descriptive names.
+ * For now, it is just the one connection to the (only) "peer". */
+ dentry = debugfs_create_dir("peer", conns_dir);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ connection->debugfs_conn = dentry;
+
+ dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
+ connection->debugfs_conn, connection,
+ &connection_callback_history_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ connection->debugfs_conn_callback_history = dentry;
+
+ dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
+ connection->debugfs_conn, connection,
+ &connection_oldest_requests_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ connection->debugfs_conn_oldest_requests = dentry;
+ return;
+
+fail:
+ drbd_debugfs_connection_cleanup(connection);
+ drbd_err(connection, "failed to create debugfs dentry\n");
+}
+
+void drbd_debugfs_connection_cleanup(struct drbd_connection *connection)
+{
+ drbd_debugfs_remove(&connection->debugfs_conn_callback_history);
+ drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests);
+ drbd_debugfs_remove(&connection->debugfs_conn);
+}
+
+static void resync_dump_detail(struct seq_file *m, struct lc_element *e)
+{
+ struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
+
+ seq_printf(m, "%5d %s %s %s\n", bme->rs_left,
+ test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------",
+ test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------",
+ test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------"
+ );
+}
+
+static int device_resync_extents_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ if (get_ldev_if_state(device, D_FAILED)) {
+ lc_seq_printf_stats(m, device->resync);
+ lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail);
+ put_ldev(device);
+ }
+ return 0;
+}
+
+static int device_act_log_extents_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ if (get_ldev_if_state(device, D_FAILED)) {
+ lc_seq_printf_stats(m, device->act_log);
+ lc_seq_dump_details(m, device->act_log, "", NULL);
+ put_ldev(device);
+ }
+ return 0;
+}
+
+static int device_oldest_requests_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+ struct drbd_resource *resource = device->resource;
+ unsigned long now = jiffies;
+ struct drbd_request *r1, *r2;
+ int i;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ seq_puts(m, RQ_HDR);
+ spin_lock_irq(&resource->req_lock);
+ /* WRITE, then READ */
+ for (i = 1; i >= 0; --i) {
+ r1 = list_first_entry_or_null(&device->pending_master_completion[i],
+ struct drbd_request, req_pending_master_completion);
+ r2 = list_first_entry_or_null(&device->pending_completion[i],
+ struct drbd_request, req_pending_local);
+ if (r1)
+ seq_print_one_request(m, r1, now);
+ if (r2 && r2 != r1)
+ seq_print_one_request(m, r2, now);
+ }
+ spin_unlock_irq(&resource->req_lock);
+ return 0;
+}
+
+static int device_data_gen_id_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+ struct drbd_md *md;
+ enum drbd_uuid_index idx;
+
+ if (!get_ldev_if_state(device, D_FAILED))
+ return -ENODEV;
+
+ md = &device->ldev->md;
+ spin_lock_irq(&md->uuid_lock);
+ for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) {
+ seq_printf(m, "0x%016llX\n", md->uuid[idx]);
+ }
+ spin_unlock_irq(&md->uuid_lock);
+ put_ldev(device);
+ return 0;
+}
+
+#define drbd_debugfs_device_attr(name) \
+static int device_ ## name ## _open(struct inode *inode, struct file *file) \
+{ \
+ struct drbd_device *device = inode->i_private; \
+ return drbd_single_open(file, device_ ## name ## _show, device, \
+ &device->kref, drbd_destroy_device); \
+} \
+static int device_ ## name ## _release(struct inode *inode, struct file *file) \
+{ \
+ struct drbd_device *device = inode->i_private; \
+ kref_put(&device->kref, drbd_destroy_device); \
+ return single_release(inode, file); \
+} \
+static const struct file_operations device_ ## name ## _fops = { \
+ .owner = THIS_MODULE, \
+ .open = device_ ## name ## _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = device_ ## name ## _release, \
+};
+
+drbd_debugfs_device_attr(oldest_requests)
+drbd_debugfs_device_attr(act_log_extents)
+drbd_debugfs_device_attr(resync_extents)
+drbd_debugfs_device_attr(data_gen_id)
+
+void drbd_debugfs_device_add(struct drbd_device *device)
+{
+ struct dentry *vols_dir = device->resource->debugfs_res_volumes;
+ char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */
+ char vnr_buf[8]; /* volume number vnr is even 16 bit only; */
+ char *slink_name = NULL;
+
+ struct dentry *dentry;
+ if (!vols_dir || !drbd_debugfs_minors)
+ return;
+
+ snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr);
+ dentry = debugfs_create_dir(vnr_buf, vols_dir);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ device->debugfs_vol = dentry;
+
+ snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor);
+ slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u",
+ device->resource->name, device->vnr);
+ if (!slink_name)
+ goto fail;
+ dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name);
+ kfree(slink_name);
+ slink_name = NULL;
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ device->debugfs_minor = dentry;
+
+#define DCF(name) do { \
+ dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
+ device->debugfs_vol, device, \
+ &device_ ## name ## _fops); \
+ if (IS_ERR_OR_NULL(dentry)) \
+ goto fail; \
+ device->debugfs_vol_ ## name = dentry; \
+ } while (0)
+
+ DCF(oldest_requests);
+ DCF(act_log_extents);
+ DCF(resync_extents);
+ DCF(data_gen_id);
+#undef DCF
+ return;
+
+fail:
+ drbd_debugfs_device_cleanup(device);
+ drbd_err(device, "failed to create debugfs entries\n");
+}
+
+void drbd_debugfs_device_cleanup(struct drbd_device *device)
+{
+ drbd_debugfs_remove(&device->debugfs_minor);
+ drbd_debugfs_remove(&device->debugfs_vol_oldest_requests);
+ drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
+ drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
+ drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
+ drbd_debugfs_remove(&device->debugfs_vol);
+}
+
+void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device)
+{
+ struct dentry *conn_dir = peer_device->connection->debugfs_conn;
+ struct dentry *dentry;
+ char vnr_buf[8];
+
+ if (!conn_dir)
+ return;
+
+ snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr);
+ dentry = debugfs_create_dir(vnr_buf, conn_dir);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ peer_device->debugfs_peer_dev = dentry;
+ return;
+
+fail:
+ drbd_debugfs_peer_device_cleanup(peer_device);
+ drbd_err(peer_device, "failed to create debugfs entries\n");
+}
+
+void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device)
+{
+ drbd_debugfs_remove(&peer_device->debugfs_peer_dev);
+}
+
+static int drbd_version_show(struct seq_file *m, void *ignored)
+{
+ seq_printf(m, "# %s\n", drbd_buildtag());
+ seq_printf(m, "VERSION=%s\n", REL_VERSION);
+ seq_printf(m, "API_VERSION=%u\n", API_VERSION);
+ seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN);
+ seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX);
+ return 0;
+}
+
+static int drbd_version_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, drbd_version_show, NULL);
+}
+
+static struct file_operations drbd_version_fops = {
+ .owner = THIS_MODULE,
+ .open = drbd_version_open,
+ .llseek = seq_lseek,
+ .read = seq_read,
+ .release = single_release,
+};
+
+/* not __exit, may be indirectly called
+ * from the module-load-failure path as well. */
+void drbd_debugfs_cleanup(void)
+{
+ drbd_debugfs_remove(&drbd_debugfs_resources);
+ drbd_debugfs_remove(&drbd_debugfs_minors);
+ drbd_debugfs_remove(&drbd_debugfs_version);
+ drbd_debugfs_remove(&drbd_debugfs_root);
+}
+
+int __init drbd_debugfs_init(void)
+{
+ struct dentry *dentry;
+
+ dentry = debugfs_create_dir("drbd", NULL);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_root = dentry;
+
+ dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_version = dentry;
+
+ dentry = debugfs_create_dir("resources", drbd_debugfs_root);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_resources = dentry;
+
+ dentry = debugfs_create_dir("minors", drbd_debugfs_root);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_minors = dentry;
+ return 0;
+
+fail:
+ drbd_debugfs_cleanup();
+ if (dentry)
+ return PTR_ERR(dentry);
+ else
+ return -EINVAL;
+}
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h
new file mode 100644
index 00000000000..8bee21340dc
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.h
@@ -0,0 +1,39 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+
+#include "drbd_int.h"
+
+#ifdef CONFIG_DEBUG_FS
+int __init drbd_debugfs_init(void);
+void drbd_debugfs_cleanup(void);
+
+void drbd_debugfs_resource_add(struct drbd_resource *resource);
+void drbd_debugfs_resource_cleanup(struct drbd_resource *resource);
+
+void drbd_debugfs_connection_add(struct drbd_connection *connection);
+void drbd_debugfs_connection_cleanup(struct drbd_connection *connection);
+
+void drbd_debugfs_device_add(struct drbd_device *device);
+void drbd_debugfs_device_cleanup(struct drbd_device *device);
+
+void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device);
+void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device);
+#else
+
+static inline int __init drbd_debugfs_init(void) { return -ENODEV; }
+static inline void drbd_debugfs_cleanup(void) { }
+
+static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { }
+static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { }
+
+static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { }
+static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { }
+
+static inline void drbd_debugfs_device_add(struct drbd_device *device) { }
+static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { }
+
+static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { }
+static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { }
+
+#endif
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a76ceb344d6..1a000016ccd 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -317,7 +317,63 @@ struct drbd_request {
struct list_head tl_requests; /* ring list in the transfer log */
struct bio *master_bio; /* master bio pointer */
- unsigned long start_time;
+
+ /* see struct drbd_device */
+ struct list_head req_pending_master_completion;
+ struct list_head req_pending_local;
+
+ /* for generic IO accounting */
+ unsigned long start_jif;
+
+ /* for DRBD internal statistics */
+
+ /* Minimal set of time stamps to determine if we wait for activity log
+ * transactions, local disk or peer. 32 bit "jiffies" are good enough,
+ * we don't expect a DRBD request to be stalled for several month.
+ */
+
+ /* before actual request processing */
+ unsigned long in_actlog_jif;
+
+ /* local disk */
+ unsigned long pre_submit_jif;
+
+ /* per connection */
+ unsigned long pre_send_jif;
+ unsigned long acked_jif;
+ unsigned long net_done_jif;
+
+ /* Possibly even more detail to track each phase:
+ * master_completion_jif
+ * how long did it take to complete the master bio
+ * (application visible latency)
+ * allocated_jif
+ * how long the master bio was blocked until we finally allocated
+ * a tracking struct
+ * in_actlog_jif
+ * how long did we wait for activity log transactions
+ *
+ * net_queued_jif
+ * when did we finally queue it for sending
+ * pre_send_jif
+ * when did we start sending it
+ * post_send_jif
+ * how long did we block in the network stack trying to send it
+ * acked_jif
+ * when did we receive (or fake, in protocol A) a remote ACK
+ * net_done_jif
+ * when did we receive final acknowledgement (P_BARRIER_ACK),
+ * or decide, e.g. on connection loss, that we do no longer expect
+ * anything from this peer for this request.
+ *
+ * pre_submit_jif
+ * post_sub_jif
+ * when did we start submiting to the lower level device,
+ * and how long did we block in that submit function
+ * local_completion_jif
+ * how long did it take the lower level device to complete this request
+ */
+
/* once it hits 0, we may complete the master_bio */
atomic_t completion_ref;
@@ -366,6 +422,7 @@ struct drbd_peer_request {
struct drbd_interval i;
/* see comments on ee flag bits below */
unsigned long flags;
+ unsigned long submit_jif;
union {
u64 block_id;
struct digest_info *digest;
@@ -408,6 +465,17 @@ enum {
/* Is set when net_conf had two_primaries set while creating this peer_req */
__EE_IN_INTERVAL_TREE,
+
+ /* for debugfs: */
+ /* has this been submitted, or does it still wait for something else? */
+ __EE_SUBMITTED,
+
+ /* this is/was a write request */
+ __EE_WRITE,
+
+ /* this originates from application on peer
+ * (not some resync or verify or other DRBD internal request) */
+ __EE_APPLICATION,
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
@@ -419,6 +487,9 @@ enum {
#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK)
#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
+#define EE_SUBMITTED (1<<__EE_SUBMITTED)
+#define EE_WRITE (1<<__EE_WRITE)
+#define EE_APPLICATION (1<<__EE_APPLICATION)
/* flag bits per device */
enum {
@@ -433,11 +504,11 @@ enum {
CONSIDER_RESYNC,
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
+
SUSPEND_IO, /* suspend application io */
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
- GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed, returned IO error */
WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
@@ -450,6 +521,20 @@ enum {
B_RS_H_DONE, /* Before resync handler done (already executed) */
DISCARD_MY_DATA, /* discard_my_data flag per volume */
READ_BALANCE_RR,
+
+ FLUSH_PENDING, /* if set, device->flush_jif is when we submitted that flush
+ * from drbd_flush_after_epoch() */
+
+ /* cleared only after backing device related structures have been destroyed. */
+ GOING_DISKLESS, /* Disk is being detached, because of io-error, or admin request. */
+
+ /* to be used in drbd_device_post_work() */
+ GO_DISKLESS, /* tell worker to schedule cleanup before detach */
+ DESTROY_DISK, /* tell worker to close backing devices and destroy related structures. */
+ MD_SYNC, /* tell worker to call drbd_md_sync() */
+ RS_START, /* tell worker to start resync/OV */
+ RS_PROGRESS, /* tell worker that resync made significant progress */
+ RS_DONE, /* tell worker that resync is done */
};
struct drbd_bitmap; /* opaque for drbd_device */
@@ -531,6 +616,11 @@ struct drbd_backing_dev {
};
struct drbd_md_io {
+ struct page *page;
+ unsigned long start_jif; /* last call to drbd_md_get_buffer */
+ unsigned long submit_jif; /* last _drbd_md_sync_page_io() submit */
+ const char *current_use;
+ atomic_t in_use;
unsigned int done;
int error;
};
@@ -577,10 +667,18 @@ enum {
* and potentially deadlock on, this drbd worker.
*/
DISCONNECT_SENT,
+
+ DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
};
struct drbd_resource {
char *name;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_res;
+ struct dentry *debugfs_res_volumes;
+ struct dentry *debugfs_res_connections;
+ struct dentry *debugfs_res_in_flight_summary;
+#endif
struct kref kref;
struct idr devices; /* volume number to device mapping */
struct list_head connections;
@@ -594,12 +692,28 @@ struct drbd_resource {
unsigned susp_nod:1; /* IO suspended because no data */
unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
+ enum write_ordering_e write_ordering;
+
cpumask_var_t cpu_mask;
};
+struct drbd_thread_timing_details
+{
+ unsigned long start_jif;
+ void *cb_addr;
+ const char *caller_fn;
+ unsigned int line;
+ unsigned int cb_nr;
+};
+
struct drbd_connection {
struct list_head connections;
struct drbd_resource *resource;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_conn;
+ struct dentry *debugfs_conn_callback_history;
+ struct dentry *debugfs_conn_oldest_requests;
+#endif
struct kref kref;
struct idr peer_devices; /* volume number to peer device mapping */
enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */
@@ -636,7 +750,6 @@ struct drbd_connection {
struct drbd_epoch *current_epoch;
spinlock_t epoch_lock;
unsigned int epochs;
- enum write_ordering_e write_ordering;
atomic_t current_tle_nr; /* transfer log epoch number */
unsigned current_tle_writes; /* writes seen within this tl epoch */
@@ -645,9 +758,22 @@ struct drbd_connection {
struct drbd_thread worker;
struct drbd_thread asender;
+ /* cached pointers,
+ * so we can look up the oldest pending requests more quickly.
+ * protected by resource->req_lock */
+ struct drbd_request *req_next; /* DRBD 9: todo.req_next */
+ struct drbd_request *req_ack_pending;
+ struct drbd_request *req_not_net_done;
+
/* sender side */
struct drbd_work_queue sender_work;
+#define DRBD_THREAD_DETAILS_HIST 16
+ unsigned int w_cb_nr; /* keeps counting up */
+ unsigned int r_cb_nr; /* keeps counting up */
+ struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST];
+ struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
+
struct {
/* whether this sender thread
* has processed a single write yet. */
@@ -663,11 +789,22 @@ struct drbd_connection {
} send;
};
+void __update_timing_details(
+ struct drbd_thread_timing_details *tdp,
+ unsigned int *cb_nr,
+ void *cb,
+ const char *fn, const unsigned int line);
+
+#define update_worker_timing_details(c, cb) \
+ __update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ )
+#define update_receiver_timing_details(c, cb) \
+ __update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ )
+
struct submit_worker {
struct workqueue_struct *wq;
struct work_struct worker;
- spinlock_t lock;
+ /* protected by ..->resource->req_lock */
struct list_head writes;
};
@@ -675,12 +812,29 @@ struct drbd_peer_device {
struct list_head peer_devices;
struct drbd_device *device;
struct drbd_connection *connection;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_peer_dev;
+#endif
};
struct drbd_device {
struct drbd_resource *resource;
struct list_head peer_devices;
- int vnr; /* volume number within the connection */
+ struct list_head pending_bitmap_io;
+
+ unsigned long flush_jif;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_minor;
+ struct dentry *debugfs_vol;
+ struct dentry *debugfs_vol_oldest_requests;
+ struct dentry *debugfs_vol_act_log_extents;
+ struct dentry *debugfs_vol_resync_extents;
+ struct dentry *debugfs_vol_data_gen_id;
+#endif
+
+ unsigned int vnr; /* volume number within the connection */
+ unsigned int minor; /* device minor number */
+
struct kref kref;
/* things that are stored as / read from meta data on disk */
@@ -697,19 +851,10 @@ struct drbd_device {
unsigned long last_reattach_jif;
struct drbd_work resync_work;
struct drbd_work unplug_work;
- struct drbd_work go_diskless;
- struct drbd_work md_sync_work;
- struct drbd_work start_resync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
struct timer_list start_resync_timer;
struct timer_list request_timer;
-#ifdef DRBD_DEBUG_MD_SYNC
- struct {
- unsigned int line;
- const char* func;
- } last_md_mark_dirty;
-#endif
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
@@ -724,6 +869,7 @@ struct drbd_device {
unsigned int al_writ_cnt;
unsigned int bm_writ_cnt;
atomic_t ap_bio_cnt; /* Requests we need to complete */
+ atomic_t ap_actlog_cnt; /* Requests waiting for activity log */
atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
atomic_t unacked_cnt; /* Need to send replies for */
@@ -733,6 +879,13 @@ struct drbd_device {
struct rb_root read_requests;
struct rb_root write_requests;
+ /* for statistics and timeouts */
+ /* [0] read, [1] write */
+ struct list_head pending_master_completion[2];
+ struct list_head pending_completion[2];
+
+ /* use checksums for *this* resync */
+ bool use_csums;
/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
unsigned long rs_total;
/* number of resync blocks that failed in this run */
@@ -788,9 +941,7 @@ struct drbd_device {
atomic_t pp_in_use; /* allocated from page pool */
atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
wait_queue_head_t ee_wait;
- struct page *md_io_page; /* one page buffer for md_io */
struct drbd_md_io md_io;
- atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
spinlock_t al_lock;
wait_queue_head_t al_wait;
struct lru_cache *act_log; /* activity log */
@@ -800,7 +951,6 @@ struct drbd_device {
atomic_t packet_seq;
unsigned int peer_seq;
spinlock_t peer_seq_lock;
- unsigned int minor;
unsigned long comm_bm_set; /* communicated number of set bits. */
struct bm_io_work bm_io_work;
u64 ed_uuid; /* UUID of the exposed data */
@@ -824,6 +974,21 @@ struct drbd_device {
struct submit_worker submit;
};
+struct drbd_bm_aio_ctx {
+ struct drbd_device *device;
+ struct list_head list; /* on device->pending_bitmap_io */;
+ unsigned long start_jif;
+ atomic_t in_flight;
+ unsigned int done;
+ unsigned flags;
+#define BM_AIO_COPY_PAGES 1
+#define BM_AIO_WRITE_HINTED 2
+#define BM_AIO_WRITE_ALL_PAGES 4
+#define BM_AIO_READ 8
+ int error;
+ struct kref kref;
+};
+
struct drbd_config_context {
/* assigned from drbd_genlmsghdr */
unsigned int minor;
@@ -949,7 +1114,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
extern int drbd_send_bitmap(struct drbd_device *device);
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
-extern void drbd_free_bc(struct drbd_backing_dev *ldev);
+extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
void drbd_print_uuids(struct drbd_device *device, const char *text);
@@ -966,13 +1131,7 @@ extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must
extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local);
extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local);
extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
-#ifndef DRBD_DEBUG_MD_SYNC
extern void drbd_md_mark_dirty(struct drbd_device *device);
-#else
-#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
-extern void drbd_md_mark_dirty_(struct drbd_device *device,
- unsigned int line, const char *func);
-#endif
extern void drbd_queue_bitmap_io(struct drbd_device *device,
int (*io_fn)(struct drbd_device *),
void (*done)(struct drbd_device *, int),
@@ -983,9 +1142,8 @@ extern int drbd_bitmap_io(struct drbd_device *device,
extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
int (*io_fn)(struct drbd_device *),
char *why, enum bm_flag flags);
-extern int drbd_bmio_set_n_write(struct drbd_device *device);
-extern int drbd_bmio_clear_n_write(struct drbd_device *device);
-extern void drbd_ldev_destroy(struct drbd_device *device);
+extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
+extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
/* Meta data layout
*
@@ -1105,17 +1263,21 @@ struct bm_extent {
/* in which _bitmap_ extent (resp. sector) the bit for a certain
* _storage_ sector is located in */
#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9))
+#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
-/* how much _storage_ sectors we have per bitmap sector */
+/* first storage sector a bitmap extent corresponds to */
#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9))
+/* how much _storage_ sectors we have per bitmap extent */
#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1)
+/* how many bits are covered by one bitmap extent (resync extent) */
+#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
+
+#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1)
+
/* in one sector of the bitmap, we have this many activity_log extents. */
#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
-#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
-#define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
-
/* the extent in "PER_EXTENT" below is an activity log extent
* we need that many (long words/bytes) to store the bitmap
* of one AL_EXTENT_SIZE chunk of storage.
@@ -1195,11 +1357,11 @@ extern void _drbd_bm_set_bits(struct drbd_device *device,
const unsigned long s, const unsigned long e);
extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
-extern int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local);
extern int drbd_bm_read(struct drbd_device *device) __must_hold(local);
extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
extern int drbd_bm_write(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
+extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
extern size_t drbd_bm_words(struct drbd_device *device);
@@ -1213,7 +1375,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon
extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
-extern int drbd_bm_rs_done(struct drbd_device *device);
/* for receive_bitmap */
extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
size_t number, unsigned long *buffer);
@@ -1312,7 +1473,7 @@ enum determine_dev_size {
extern enum determine_dev_size
drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
extern void resync_after_online_grow(struct drbd_device *);
-extern void drbd_reconsider_max_bio_size(struct drbd_device *device);
+extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
enum drbd_role new_role,
int force);
@@ -1333,7 +1494,7 @@ extern void resume_next_sg(struct drbd_device *device);
extern void suspend_other_sg(struct drbd_device *device);
extern int drbd_resync_finished(struct drbd_device *device);
/* maybe rather drbd_main.c ? */
-extern void *drbd_md_get_buffer(struct drbd_device *device);
+extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
extern void drbd_md_put_buffer(struct drbd_device *device);
extern int drbd_md_sync_page_io(struct drbd_device *device,
struct drbd_backing_dev *bdev, sector_t sector, int rw);
@@ -1380,7 +1541,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
-extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+ bool throttle_if_app_is_waiting);
extern int drbd_submit_peer_request(struct drbd_device *,
struct drbd_peer_request *, const unsigned,
const int);
@@ -1464,10 +1626,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
{
__release(local);
if (!bio->bi_bdev) {
- printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
- "bio->bi_bdev == NULL\n",
- device_to_minor(device));
- dump_stack();
+ drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
bio_endio(bio, -ENODEV);
return;
}
@@ -1478,7 +1637,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
generic_make_request(bio);
}
-void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+ enum write_ordering_e wo);
/* drbd_proc.c */
extern struct proc_dir_entry *drbd_proc;
@@ -1489,9 +1649,9 @@ extern const char *drbd_role_str(enum drbd_role s);
/* drbd_actlog.c */
extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
-extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
+extern void drbd_al_begin_io_commit(struct drbd_device *device);
extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
-extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate);
+extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
@@ -1501,14 +1661,17 @@ extern int drbd_rs_del_all(struct drbd_device *device);
extern void drbd_rs_failed_io(struct drbd_device *device,
sector_t sector, int size);
extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
-extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector,
- int size, const char *file, const unsigned int line);
+
+enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
+extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+ enum update_sync_bits_mode mode,
+ const char *file, const unsigned int line);
#define drbd_set_in_sync(device, sector, size) \
- __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__)
-extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector,
- int size, const char *file, const unsigned int line);
+ __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__)
#define drbd_set_out_of_sync(device, sector, size) \
- __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__)
+ __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__)
+#define drbd_rs_failed_io(device, sector, size) \
+ __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__)
extern void drbd_al_shrink(struct drbd_device *device);
extern int drbd_initialize_al(struct drbd_device *, void *);
@@ -1764,25 +1927,38 @@ static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
}
static inline void
-drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
+drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
{
unsigned long flags;
spin_lock_irqsave(&q->q_lock, flags);
- list_add(&w->list, &q->q);
+ list_add_tail(&w->list, &q->q);
spin_unlock_irqrestore(&q->q_lock, flags);
wake_up(&q->q_wait);
}
static inline void
-drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w)
{
unsigned long flags;
spin_lock_irqsave(&q->q_lock, flags);
- list_add_tail(&w->list, &q->q);
+ if (list_empty_careful(&w->list))
+ list_add_tail(&w->list, &q->q);
spin_unlock_irqrestore(&q->q_lock, flags);
wake_up(&q->q_wait);
}
+static inline void
+drbd_device_post_work(struct drbd_device *device, int work_bit)
+{
+ if (!test_and_set_bit(work_bit, &device->flags)) {
+ struct drbd_connection *connection =
+ first_peer_device(device)->connection;
+ struct drbd_work_queue *q = &connection->sender_work;
+ if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags))
+ wake_up(&q->q_wait);
+ }
+}
+
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
static inline void wake_asender(struct drbd_connection *connection)
@@ -1859,7 +2035,7 @@ static inline void inc_ap_pending(struct drbd_device *device)
func, line, \
atomic_read(&device->which))
-#define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__)
+#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
{
if (atomic_dec_and_test(&device->ap_pending_cnt))
@@ -1878,7 +2054,7 @@ static inline void inc_rs_pending(struct drbd_device *device)
atomic_inc(&device->rs_pending_cnt);
}
-#define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__)
+#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
{
atomic_dec(&device->rs_pending_cnt);
@@ -1899,20 +2075,29 @@ static inline void inc_unacked(struct drbd_device *device)
atomic_inc(&device->unacked_cnt);
}
-#define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__)
+#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
{
atomic_dec(&device->unacked_cnt);
ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
}
-#define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__)
+#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
{
atomic_sub(n, &device->unacked_cnt);
ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
}
+static inline bool is_sync_state(enum drbd_conns connection_state)
+{
+ return
+ (connection_state == C_SYNC_SOURCE
+ || connection_state == C_SYNC_TARGET
+ || connection_state == C_PAUSED_SYNC_S
+ || connection_state == C_PAUSED_SYNC_T);
+}
+
/**
* get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
* @M: DRBD device.
@@ -1924,6 +2109,11 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f
static inline void put_ldev(struct drbd_device *device)
{
+ enum drbd_disk_state ds = device->state.disk;
+ /* We must check the state *before* the atomic_dec becomes visible,
+ * or we have a theoretical race where someone hitting zero,
+ * while state still D_FAILED, will then see D_DISKLESS in the
+ * condition below and calling into destroy, where he must not, yet. */
int i = atomic_dec_return(&device->local_cnt);
/* This may be called from some endio handler,
@@ -1932,15 +2122,13 @@ static inline void put_ldev(struct drbd_device *device)
__release(local);
D_ASSERT(device, i >= 0);
if (i == 0) {
- if (device->state.disk == D_DISKLESS)
+ if (ds == D_DISKLESS)
/* even internal references gone, safe to destroy */
- drbd_ldev_destroy(device);
- if (device->state.disk == D_FAILED) {
+ drbd_device_post_work(device, DESTROY_DISK);
+ if (ds == D_FAILED)
/* all application IO references gone. */
- if (!test_and_set_bit(GO_DISKLESS, &device->flags))
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
- &device->go_diskless);
- }
+ if (!test_and_set_bit(GOING_DISKLESS, &device->flags))
+ drbd_device_post_work(device, GO_DISKLESS);
wake_up(&device->misc_wait);
}
}
@@ -1964,54 +2152,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_
extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
#endif
-/* you must have an "get_ldev" reference */
-static inline void drbd_get_syncer_progress(struct drbd_device *device,
- unsigned long *bits_left, unsigned int *per_mil_done)
-{
- /* this is to break it at compile time when we change that, in case we
- * want to support more than (1<<32) bits on a 32bit arch. */
- typecheck(unsigned long, device->rs_total);
-
- /* note: both rs_total and rs_left are in bits, i.e. in
- * units of BM_BLOCK_SIZE.
- * for the percentage, we don't care. */
-
- if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
- *bits_left = device->ov_left;
- else
- *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
- /* >> 10 to prevent overflow,
- * +1 to prevent division by zero */
- if (*bits_left > device->rs_total) {
- /* doh. maybe a logic bug somewhere.
- * may also be just a race condition
- * between this and a disconnect during sync.
- * for now, just prevent in-kernel buffer overflow.
- */
- smp_rmb();
- drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
- drbd_conn_str(device->state.conn),
- *bits_left, device->rs_total, device->rs_failed);
- *per_mil_done = 0;
- } else {
- /* Make sure the division happens in long context.
- * We allow up to one petabyte storage right now,
- * at a granularity of 4k per bit that is 2**38 bits.
- * After shift right and multiplication by 1000,
- * this should still fit easily into a 32bit long,
- * so we don't need a 64bit division on 32bit arch.
- * Note: currently we don't support such large bitmaps on 32bit
- * arch anyways, but no harm done to be prepared for it here.
- */
- unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10;
- unsigned long left = *bits_left >> shift;
- unsigned long total = 1UL + (device->rs_total >> shift);
- unsigned long tmp = 1000UL - left * 1000UL/total;
- *per_mil_done = tmp;
- }
-}
-
-
/* this throttles on-the-fly application requests
* according to max_buffers settings;
* maybe re-implement using semaphores? */
@@ -2201,25 +2341,6 @@ static inline int drbd_queue_order_type(struct drbd_device *device)
return QUEUE_ORDERED_NONE;
}
-static inline void drbd_md_flush(struct drbd_device *device)
-{
- int r;
-
- if (device->ldev == NULL) {
- drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n");
- return;
- }
-
- if (test_bit(MD_NO_FUA, &device->flags))
- return;
-
- r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL);
- if (r) {
- set_bit(MD_NO_FUA, &device->flags);
- drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r);
- }
-}
-
static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
{
return list_first_entry_or_null(&resource->connections,
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index f38fcb00c10..f210543f05f 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -10,7 +10,9 @@ struct drbd_interval {
unsigned int size; /* size in bytes */
sector_t end; /* highest interval end in subtree */
int local:1 /* local or remote request? */;
- int waiting:1;
+ int waiting:1; /* someone is waiting for this to complete */
+ int completed:1; /* this has been completed already;
+ * ignore for conflict detection */
};
static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 960645c26e6..9b465bb6848 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -26,7 +26,10 @@
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
+#include <linux/jiffies.h>
#include <linux/drbd.h>
#include <asm/uaccess.h>
#include <asm/types.h>
@@ -54,16 +57,14 @@
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
-
#include "drbd_vli.h"
+#include "drbd_debugfs.h"
static DEFINE_MUTEX(drbd_main_mutex);
static int drbd_open(struct block_device *bdev, fmode_t mode);
static void drbd_release(struct gendisk *gd, fmode_t mode);
-static int w_md_sync(struct drbd_work *w, int unused);
static void md_sync_timer_fn(unsigned long data);
static int w_bitmap_io(struct drbd_work *w, int unused);
-static int w_go_diskless(struct drbd_work *w, int unused);
MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
"Lars Ellenberg <lars@linbit.com>");
@@ -264,7 +265,7 @@ bail:
/**
* _tl_restart() - Walks the transfer log, and applies an action to all requests
- * @device: DRBD device.
+ * @connection: DRBD connection to operate on.
* @what: The action/event to perform with all request objects
*
* @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
@@ -662,6 +663,11 @@ static int __send_command(struct drbd_connection *connection, int vnr,
msg_flags);
if (data && !err)
err = drbd_send_all(connection, sock->socket, data, size, 0);
+ /* DRBD protocol "pings" are latency critical.
+ * This is supposed to trigger tcp_push_pending_frames() */
+ if (!err && (cmd == P_PING || cmd == P_PING_ACK))
+ drbd_tcp_nodelay(sock->socket);
+
return err;
}
@@ -1636,7 +1642,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
if (peer_device->connection->agreed_pro_version >= 100) {
if (req->rq_state & RQ_EXP_RECEIVE_ACK)
dp_flags |= DP_SEND_RECEIVE_ACK;
- if (req->rq_state & RQ_EXP_WRITE_ACK)
+ /* During resync, request an explicit write ack,
+ * even in protocol != C */
+ if (req->rq_state & RQ_EXP_WRITE_ACK
+ || (dp_flags & DP_MAY_SET_IN_SYNC))
dp_flags |= DP_SEND_WRITE_ACK;
}
p->dp_flags = cpu_to_be32(dp_flags);
@@ -1900,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
drbd_set_defaults(device);
atomic_set(&device->ap_bio_cnt, 0);
+ atomic_set(&device->ap_actlog_cnt, 0);
atomic_set(&device->ap_pending_cnt, 0);
atomic_set(&device->rs_pending_cnt, 0);
atomic_set(&device->unacked_cnt, 0);
@@ -1908,7 +1918,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
atomic_set(&device->rs_sect_in, 0);
atomic_set(&device->rs_sect_ev, 0);
atomic_set(&device->ap_in_flight, 0);
- atomic_set(&device->md_io_in_use, 0);
+ atomic_set(&device->md_io.in_use, 0);
mutex_init(&device->own_state_mutex);
device->state_mutex = &device->own_state_mutex;
@@ -1924,17 +1934,15 @@ void drbd_init_set_defaults(struct drbd_device *device)
INIT_LIST_HEAD(&device->resync_reads);
INIT_LIST_HEAD(&device->resync_work.list);
INIT_LIST_HEAD(&device->unplug_work.list);
- INIT_LIST_HEAD(&device->go_diskless.list);
- INIT_LIST_HEAD(&device->md_sync_work.list);
- INIT_LIST_HEAD(&device->start_resync_work.list);
INIT_LIST_HEAD(&device->bm_io_work.w.list);
+ INIT_LIST_HEAD(&device->pending_master_completion[0]);
+ INIT_LIST_HEAD(&device->pending_master_completion[1]);
+ INIT_LIST_HEAD(&device->pending_completion[0]);
+ INIT_LIST_HEAD(&device->pending_completion[1]);
device->resync_work.cb = w_resync_timer;
device->unplug_work.cb = w_send_write_hint;
- device->go_diskless.cb = w_go_diskless;
- device->md_sync_work.cb = w_md_sync;
device->bm_io_work.w.cb = w_bitmap_io;
- device->start_resync_work.cb = w_start_resync;
init_timer(&device->resync_timer);
init_timer(&device->md_sync_timer);
@@ -1992,7 +2000,7 @@ void drbd_device_cleanup(struct drbd_device *device)
drbd_bm_cleanup(device);
}
- drbd_free_bc(device->ldev);
+ drbd_free_ldev(device->ldev);
device->ldev = NULL;
clear_bit(AL_SUSPENDED, &device->flags);
@@ -2006,7 +2014,6 @@ void drbd_device_cleanup(struct drbd_device *device)
D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q));
D_ASSERT(device, list_empty(&device->resync_work.list));
D_ASSERT(device, list_empty(&device->unplug_work.list));
- D_ASSERT(device, list_empty(&device->go_diskless.list));
drbd_set_defaults(device);
}
@@ -2129,20 +2136,6 @@ Enomem:
return -ENOMEM;
}
-static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
- void *unused)
-{
- /* just so we have it. you never know what interesting things we
- * might want to do here some day...
- */
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block drbd_notifier = {
- .notifier_call = drbd_notify_sys,
-};
-
static void drbd_release_all_peer_reqs(struct drbd_device *device)
{
int rr;
@@ -2173,7 +2166,7 @@ void drbd_destroy_device(struct kref *kref)
{
struct drbd_device *device = container_of(kref, struct drbd_device, kref);
struct drbd_resource *resource = device->resource;
- struct drbd_connection *connection;
+ struct drbd_peer_device *peer_device, *tmp_peer_device;
del_timer_sync(&device->request_timer);
@@ -2187,7 +2180,7 @@ void drbd_destroy_device(struct kref *kref)
if (device->this_bdev)
bdput(device->this_bdev);
- drbd_free_bc(device->ldev);
+ drbd_free_ldev(device->ldev);
device->ldev = NULL;
drbd_release_all_peer_reqs(device);
@@ -2200,15 +2193,20 @@ void drbd_destroy_device(struct kref *kref)
if (device->bitmap) /* should no longer be there. */
drbd_bm_cleanup(device);
- __free_page(device->md_io_page);
+ __free_page(device->md_io.page);
put_disk(device->vdisk);
blk_cleanup_queue(device->rq_queue);
kfree(device->rs_plan_s);
- kfree(first_peer_device(device));
- kfree(device);
- for_each_connection(connection, resource)
- kref_put(&connection->kref, drbd_destroy_connection);
+ /* not for_each_connection(connection, resource):
+ * those may have been cleaned up and disassociated already.
+ */
+ for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
+ kref_put(&peer_device->connection->kref, drbd_destroy_connection);
+ kfree(peer_device);
+ }
+ memset(device, 0xfd, sizeof(*device));
+ kfree(device);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2236,7 +2234,7 @@ static void do_retry(struct work_struct *ws)
list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
struct drbd_device *device = req->device;
struct bio *bio = req->master_bio;
- unsigned long start_time = req->start_time;
+ unsigned long start_jif = req->start_jif;
bool expected;
expected =
@@ -2271,10 +2269,12 @@ static void do_retry(struct work_struct *ws)
/* We are not just doing generic_make_request(),
* as we want to keep the start_time information. */
inc_ap_bio(device);
- __drbd_make_request(device, bio, start_time);
+ __drbd_make_request(device, bio, start_jif);
}
}
+/* called via drbd_req_put_completion_ref(),
+ * holds resource->req_lock */
void drbd_restart_request(struct drbd_request *req)
{
unsigned long flags;
@@ -2298,6 +2298,7 @@ void drbd_destroy_resource(struct kref *kref)
idr_destroy(&resource->devices);
free_cpumask_var(resource->cpu_mask);
kfree(resource->name);
+ memset(resource, 0xf2, sizeof(*resource));
kfree(resource);
}
@@ -2307,8 +2308,10 @@ void drbd_free_resource(struct drbd_resource *resource)
for_each_connection_safe(connection, tmp, resource) {
list_del(&connection->connections);
+ drbd_debugfs_connection_cleanup(connection);
kref_put(&connection->kref, drbd_destroy_connection);
}
+ drbd_debugfs_resource_cleanup(resource);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2318,8 +2321,6 @@ static void drbd_cleanup(void)
struct drbd_device *device;
struct drbd_resource *resource, *tmp;
- unregister_reboot_notifier(&drbd_notifier);
-
/* first remove proc,
* drbdsetup uses it's presence to detect
* whether DRBD is loaded.
@@ -2335,6 +2336,7 @@ static void drbd_cleanup(void)
destroy_workqueue(retry.wq);
drbd_genl_unregister();
+ drbd_debugfs_cleanup();
idr_for_each_entry(&drbd_devices, device, i)
drbd_delete_device(device);
@@ -2350,7 +2352,7 @@ static void drbd_cleanup(void)
idr_destroy(&drbd_devices);
- printk(KERN_INFO "drbd: module cleanup done.\n");
+ pr_info("module cleanup done.\n");
}
/**
@@ -2539,6 +2541,20 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE,
cpumask_bits(new_cpu_mask), nr_cpu_ids);
+ if (err == -EOVERFLOW) {
+ /* So what. mask it out. */
+ cpumask_var_t tmp_cpu_mask;
+ if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) {
+ cpumask_setall(tmp_cpu_mask);
+ cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask);
+ drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n",
+ res_opts->cpu_mask,
+ strlen(res_opts->cpu_mask) > 12 ? "..." : "",
+ nr_cpu_ids);
+ free_cpumask_var(tmp_cpu_mask);
+ err = 0;
+ }
+ }
if (err) {
drbd_warn(resource, "bitmap_parse() failed with %d\n", err);
/* retcode = ERR_CPU_MASK_PARSE; */
@@ -2579,10 +2595,12 @@ struct drbd_resource *drbd_create_resource(const char *name)
kref_init(&resource->kref);
idr_init(&resource->devices);
INIT_LIST_HEAD(&resource->connections);
+ resource->write_ordering = WO_bdev_flush;
list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex);
spin_lock_init(&resource->req_lock);
+ drbd_debugfs_resource_add(resource);
return resource;
fail_free_name:
@@ -2593,7 +2611,7 @@ fail:
return NULL;
}
-/* caller must be under genl_lock() */
+/* caller must be under adm_mutex */
struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
{
struct drbd_resource *resource;
@@ -2617,7 +2635,6 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
INIT_LIST_HEAD(&connection->current_epoch->list);
connection->epochs = 1;
spin_lock_init(&connection->epoch_lock);
- connection->write_ordering = WO_bdev_flush;
connection->send.seen_any_write_yet = false;
connection->send.current_epoch_nr = 0;
@@ -2652,6 +2669,7 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
kref_get(&resource->kref);
list_add_tail_rcu(&connection->connections, &resource->connections);
+ drbd_debugfs_connection_add(connection);
return connection;
fail_resource:
@@ -2680,6 +2698,7 @@ void drbd_destroy_connection(struct kref *kref)
drbd_free_socket(&connection->data);
kfree(connection->int_dig_in);
kfree(connection->int_dig_vv);
+ memset(connection, 0xfc, sizeof(*connection));
kfree(connection);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2694,7 +2713,6 @@ static int init_submitter(struct drbd_device *device)
return -ENOMEM;
INIT_WORK(&device->submit.worker, do_submit);
- spin_lock_init(&device->submit.lock);
INIT_LIST_HEAD(&device->submit.writes);
return 0;
}
@@ -2764,8 +2782,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
blk_queue_merge_bvec(q, drbd_merge_bvec);
q->queue_lock = &resource->req_lock;
- device->md_io_page = alloc_page(GFP_KERNEL);
- if (!device->md_io_page)
+ device->md_io.page = alloc_page(GFP_KERNEL);
+ if (!device->md_io.page)
goto out_no_io_page;
if (drbd_bm_init(device))
@@ -2794,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
kref_get(&device->kref);
INIT_LIST_HEAD(&device->peer_devices);
+ INIT_LIST_HEAD(&device->pending_bitmap_io);
for_each_connection(connection, resource) {
peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL);
if (!peer_device)
@@ -2829,7 +2848,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
for_each_peer_device(peer_device, device)
drbd_connected(peer_device);
}
-
+ /* move to create_peer_device() */
+ for_each_peer_device(peer_device, device)
+ drbd_debugfs_peer_device_add(peer_device);
+ drbd_debugfs_device_add(device);
return NO_ERROR;
out_idr_remove_vol:
@@ -2853,7 +2875,7 @@ out_idr_remove_minor:
out_no_minor_idr:
drbd_bm_cleanup(device);
out_no_bitmap:
- __free_page(device->md_io_page);
+ __free_page(device->md_io.page);
out_no_io_page:
put_disk(disk);
out_no_disk:
@@ -2868,8 +2890,13 @@ void drbd_delete_device(struct drbd_device *device)
{
struct drbd_resource *resource = device->resource;
struct drbd_connection *connection;
+ struct drbd_peer_device *peer_device;
int refs = 3;
+ /* move to free_peer_device() */
+ for_each_peer_device(peer_device, device)
+ drbd_debugfs_peer_device_cleanup(peer_device);
+ drbd_debugfs_device_cleanup(device);
for_each_connection(connection, resource) {
idr_remove(&connection->peer_devices, device->vnr);
refs++;
@@ -2881,13 +2908,12 @@ void drbd_delete_device(struct drbd_device *device)
kref_sub(&device->kref, refs, drbd_destroy_device);
}
-int __init drbd_init(void)
+static int __init drbd_init(void)
{
int err;
if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
- printk(KERN_ERR
- "drbd: invalid minor_count (%d)\n", minor_count);
+ pr_err("invalid minor_count (%d)\n", minor_count);
#ifdef MODULE
return -EINVAL;
#else
@@ -2897,14 +2923,11 @@ int __init drbd_init(void)
err = register_blkdev(DRBD_MAJOR, "drbd");
if (err) {
- printk(KERN_ERR
- "drbd: unable to register block device major %d\n",
+ pr_err("unable to register block device major %d\n",
DRBD_MAJOR);
return err;
}
- register_reboot_notifier(&drbd_notifier);
-
/*
* allocate all necessary structs
*/
@@ -2918,7 +2941,7 @@ int __init drbd_init(void)
err = drbd_genl_register();
if (err) {
- printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+ pr_err("unable to register generic netlink family\n");
goto fail;
}
@@ -2929,38 +2952,39 @@ int __init drbd_init(void)
err = -ENOMEM;
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
if (!drbd_proc) {
- printk(KERN_ERR "drbd: unable to register proc file\n");
+ pr_err("unable to register proc file\n");
goto fail;
}
retry.wq = create_singlethread_workqueue("drbd-reissue");
if (!retry.wq) {
- printk(KERN_ERR "drbd: unable to create retry workqueue\n");
+ pr_err("unable to create retry workqueue\n");
goto fail;
}
INIT_WORK(&retry.worker, do_retry);
spin_lock_init(&retry.lock);
INIT_LIST_HEAD(&retry.writes);
- printk(KERN_INFO "drbd: initialized. "
+ if (drbd_debugfs_init())
+ pr_notice("failed to initialize debugfs -- will not be available\n");
+
+ pr_info("initialized. "
"Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
- printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
- printk(KERN_INFO "drbd: registered as block device major %d\n",
- DRBD_MAJOR);
-
+ pr_info("%s\n", drbd_buildtag());
+ pr_info("registered as block device major %d\n", DRBD_MAJOR);
return 0; /* Success! */
fail:
drbd_cleanup();
if (err == -ENOMEM)
- printk(KERN_ERR "drbd: ran out of memory\n");
+ pr_err("ran out of memory\n");
else
- printk(KERN_ERR "drbd: initialization failure\n");
+ pr_err("initialization failure\n");
return err;
}
-void drbd_free_bc(struct drbd_backing_dev *ldev)
+void drbd_free_ldev(struct drbd_backing_dev *ldev)
{
if (ldev == NULL)
return;
@@ -2972,24 +2996,29 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
kfree(ldev);
}
-void drbd_free_sock(struct drbd_connection *connection)
+static void drbd_free_one_sock(struct drbd_socket *ds)
{
- if (connection->data.socket) {
- mutex_lock(&connection->data.mutex);
- kernel_sock_shutdown(connection->data.socket, SHUT_RDWR);
- sock_release(connection->data.socket);
- connection->data.socket = NULL;
- mutex_unlock(&connection->data.mutex);
- }
- if (connection->meta.socket) {
- mutex_lock(&connection->meta.mutex);
- kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR);
- sock_release(connection->meta.socket);
- connection->meta.socket = NULL;
- mutex_unlock(&connection->meta.mutex);
+ struct socket *s;
+ mutex_lock(&ds->mutex);
+ s = ds->socket;
+ ds->socket = NULL;
+ mutex_unlock(&ds->mutex);
+ if (s) {
+ /* so debugfs does not need to mutex_lock() */
+ synchronize_rcu();
+ kernel_sock_shutdown(s, SHUT_RDWR);
+ sock_release(s);
}
}
+void drbd_free_sock(struct drbd_connection *connection)
+{
+ if (connection->data.socket)
+ drbd_free_one_sock(&connection->data);
+ if (connection->meta.socket)
+ drbd_free_one_sock(&connection->meta);
+}
+
/* meta data management */
void conn_md_sync(struct drbd_connection *connection)
@@ -3093,7 +3122,7 @@ void drbd_md_sync(struct drbd_device *device)
if (!get_ldev_if_state(device, D_FAILED))
return;
- buffer = drbd_md_get_buffer(device);
+ buffer = drbd_md_get_buffer(device, __func__);
if (!buffer)
goto out;
@@ -3253,7 +3282,7 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
if (device->state.disk != D_DISKLESS)
return ERR_DISK_CONFIGURED;
- buffer = drbd_md_get_buffer(device);
+ buffer = drbd_md_get_buffer(device, __func__);
if (!buffer)
return ERR_NOMEM;
@@ -3466,23 +3495,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
*
* Sets all bits in the bitmap and writes the whole bitmap to stable storage.
*/
-int drbd_bmio_set_n_write(struct drbd_device *device)
+int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
{
int rv = -EIO;
- if (get_ldev_if_state(device, D_ATTACHING)) {
- drbd_md_set_flag(device, MDF_FULL_SYNC);
- drbd_md_sync(device);
- drbd_bm_set_all(device);
-
- rv = drbd_bm_write(device);
+ drbd_md_set_flag(device, MDF_FULL_SYNC);
+ drbd_md_sync(device);
+ drbd_bm_set_all(device);
- if (!rv) {
- drbd_md_clear_flag(device, MDF_FULL_SYNC);
- drbd_md_sync(device);
- }
+ rv = drbd_bm_write(device);
- put_ldev(device);
+ if (!rv) {
+ drbd_md_clear_flag(device, MDF_FULL_SYNC);
+ drbd_md_sync(device);
}
return rv;
@@ -3494,18 +3519,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device)
*
* Clears all bits in the bitmap and writes the whole bitmap to stable storage.
*/
-int drbd_bmio_clear_n_write(struct drbd_device *device)
+int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
{
- int rv = -EIO;
-
drbd_resume_al(device);
- if (get_ldev_if_state(device, D_ATTACHING)) {
- drbd_bm_clear_all(device);
- rv = drbd_bm_write(device);
- put_ldev(device);
- }
-
- return rv;
+ drbd_bm_clear_all(device);
+ return drbd_bm_write(device);
}
static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3537,61 +3555,6 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
return 0;
}
-void drbd_ldev_destroy(struct drbd_device *device)
-{
- lc_destroy(device->resync);
- device->resync = NULL;
- lc_destroy(device->act_log);
- device->act_log = NULL;
- __no_warn(local,
- drbd_free_bc(device->ldev);
- device->ldev = NULL;);
-
- clear_bit(GO_DISKLESS, &device->flags);
-}
-
-static int w_go_diskless(struct drbd_work *w, int unused)
-{
- struct drbd_device *device =
- container_of(w, struct drbd_device, go_diskless);
-
- D_ASSERT(device, device->state.disk == D_FAILED);
- /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
- * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
- * the protected members anymore, though, so once put_ldev reaches zero
- * again, it will be safe to free them. */
-
- /* Try to write changed bitmap pages, read errors may have just
- * set some bits outside the area covered by the activity log.
- *
- * If we have an IO error during the bitmap writeout,
- * we will want a full sync next time, just in case.
- * (Do we want a specific meta data flag for this?)
- *
- * If that does not make it to stable storage either,
- * we cannot do anything about that anymore.
- *
- * We still need to check if both bitmap and ldev are present, we may
- * end up here after a failed attach, before ldev was even assigned.
- */
- if (device->bitmap && device->ldev) {
- /* An interrupted resync or similar is allowed to recounts bits
- * while we detach.
- * Any modifications would not be expected anymore, though.
- */
- if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
- "detach", BM_LOCKED_TEST_ALLOWED)) {
- if (test_bit(WAS_READ_ERROR, &device->flags)) {
- drbd_md_set_flag(device, MDF_FULL_SYNC);
- drbd_md_sync(device);
- }
- }
- }
-
- drbd_force_state(device, NS(disk, D_DISKLESS));
- return 0;
-}
-
/**
* drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
* @device: DRBD device.
@@ -3603,6 +3566,9 @@ static int w_go_diskless(struct drbd_work *w, int unused)
* that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
* called from worker context. It MUST NOT be used while a previous such
* work is still pending!
+ *
+ * Its worker function encloses the call of io_fn() by get_ldev() and
+ * put_ldev().
*/
void drbd_queue_bitmap_io(struct drbd_device *device,
int (*io_fn)(struct drbd_device *),
@@ -3685,25 +3651,7 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
static void md_sync_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
-
- /* must not double-queue! */
- if (list_empty(&device->md_sync_work.list))
- drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
- &device->md_sync_work);
-}
-
-static int w_md_sync(struct drbd_work *w, int unused)
-{
- struct drbd_device *device =
- container_of(w, struct drbd_device, md_sync_work);
-
- drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
-#ifdef DEBUG
- drbd_warn(device, "last md_mark_dirty: %s:%u\n",
- device->last_md_mark_dirty.func, device->last_md_mark_dirty.line);
-#endif
- drbd_md_sync(device);
- return 0;
+ drbd_device_post_work(device, MD_SYNC);
}
const char *cmdname(enum drbd_packet cmd)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3f2e1673808..1cd47df44bd 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -23,6 +23,8 @@
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/drbd.h>
#include <linux/in.h>
@@ -85,7 +87,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
{
genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
if (genlmsg_reply(skb, info))
- printk(KERN_ERR "drbd: error sending genl reply\n");
+ pr_err("error sending genl reply\n");
}
/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
@@ -558,8 +560,10 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection)
}
enum drbd_state_rv
-drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
+drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
const int max_tries = 4;
enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
struct net_conf *nc;
@@ -607,7 +611,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
- if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
+ if (conn_try_outdate_peer(connection)) {
val.disk = D_UP_TO_DATE;
mask.disk = D_MASK;
}
@@ -617,7 +621,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
if (rv == SS_NOTHING_TO_DO)
goto out;
if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
- if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
+ if (!conn_try_outdate_peer(connection) && force) {
drbd_warn(device, "Forced into split brain situation!\n");
mask.pdsk = D_MASK;
val.pdsk = D_OUTDATED;
@@ -630,7 +634,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
retry at most once more in this case. */
int timeo;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
rcu_read_unlock();
schedule_timeout_interruptible(timeo);
@@ -659,19 +663,17 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
/* FIXME also wait for all pending P_BARRIER_ACK? */
if (new_role == R_SECONDARY) {
- set_disk_ro(device->vdisk, true);
if (get_ldev(device)) {
device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
put_ldev(device);
}
} else {
- /* Called from drbd_adm_set_role only.
- * We are still holding the conf_update mutex. */
- nc = first_peer_device(device)->connection->net_conf;
+ mutex_lock(&device->resource->conf_update);
+ nc = connection->net_conf;
if (nc)
nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+ mutex_unlock(&device->resource->conf_update);
- set_disk_ro(device->vdisk, false);
if (get_ldev(device)) {
if (((device->state.conn < C_CONNECTED ||
device->state.pdsk <= D_FAILED)
@@ -689,12 +691,12 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
if (device->state.conn >= C_WF_REPORT_PARAMS) {
/* if this was forced, we should consider sync */
if (forced)
- drbd_send_uuids(first_peer_device(device));
- drbd_send_current_state(first_peer_device(device));
+ drbd_send_uuids(peer_device);
+ drbd_send_current_state(peer_device);
}
drbd_md_sync(device);
-
+ set_disk_ro(device->vdisk, new_role == R_SECONDARY);
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
out:
mutex_unlock(device->state_mutex);
@@ -891,7 +893,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
* still lock the act_log to not trigger ASSERTs there.
*/
drbd_suspend_io(device);
- buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
+ buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
if (!buffer) {
drbd_resume_io(device);
return DS_ERROR;
@@ -971,6 +973,10 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
if (la_size_changed || md_moved || rs) {
u32 prev_flags;
+ /* We do some synchronous IO below, which may take some time.
+ * Clear the timer, to avoid scary "timer expired!" messages,
+ * "Superblock" is written out at least twice below, anyways. */
+ del_timer(&device->md_sync_timer);
drbd_al_shrink(device); /* All extents inactive. */
prev_flags = md->flags;
@@ -1116,15 +1122,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
return 0;
}
-static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
+static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
+ unsigned int max_bio_size)
{
struct request_queue * const q = device->rq_queue;
unsigned int max_hw_sectors = max_bio_size >> 9;
unsigned int max_segments = 0;
struct request_queue *b = NULL;
- if (get_ldev_if_state(device, D_ATTACHING)) {
- b = device->ldev->backing_bdev->bd_disk->queue;
+ if (bdev) {
+ b = bdev->backing_bdev->bd_disk->queue;
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
rcu_read_lock();
@@ -1169,11 +1176,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
b->backing_dev_info.ra_pages);
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
}
- put_ldev(device);
}
}
-void drbd_reconsider_max_bio_size(struct drbd_device *device)
+void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
{
unsigned int now, new, local, peer;
@@ -1181,10 +1187,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
- if (get_ldev_if_state(device, D_ATTACHING)) {
- local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+ if (bdev) {
+ local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
device->local_max_bio_size = local;
- put_ldev(device);
}
local = min(local, DRBD_MAX_BIO_SIZE);
@@ -1217,7 +1222,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
if (new != now)
drbd_info(device, "max BIO size = %u\n", new);
- drbd_setup_queue_param(device, new);
+ drbd_setup_queue_param(device, bdev, new);
}
/* Starts the worker thread */
@@ -1299,6 +1304,13 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
}
+static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
+{
+ return a->disk_barrier != b->disk_barrier ||
+ a->disk_flushes != b->disk_flushes ||
+ a->disk_drain != b->disk_drain;
+}
+
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
@@ -1405,7 +1417,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
else
set_bit(MD_NO_FUA, &device->flags);
- drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+ if (write_ordering_changed(old_disk_conf, new_disk_conf))
+ drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
drbd_md_sync(device);
@@ -1440,6 +1453,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
struct drbd_device *device;
+ struct drbd_peer_device *peer_device;
+ struct drbd_connection *connection;
int err;
enum drbd_ret_code retcode;
enum determine_dev_size dd;
@@ -1462,7 +1477,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
device = adm_ctx.device;
mutex_lock(&adm_ctx.resource->adm_mutex);
- conn_reconfig_start(first_peer_device(device)->connection);
+ peer_device = first_peer_device(device);
+ connection = peer_device ? peer_device->connection : NULL;
+ conn_reconfig_start(connection);
/* if you want to reconfigure, please tear down first */
if (device->state.disk > D_DISKLESS) {
@@ -1473,7 +1490,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
* drbd_ldev_destroy is done already, we may end up here very fast,
* e.g. if someone calls attach from the on-io-error handler,
* to realize a "hot spare" feature (not that I'd recommend that) */
- wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
+ wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
/* make sure there is no leftover from previous force-detach attempts */
clear_bit(FORCE_DETACH, &device->flags);
@@ -1529,7 +1546,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
goto fail;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
if (nc) {
if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
rcu_read_unlock();
@@ -1649,7 +1666,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
*/
wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
/* and for any other previously queued work */
- drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
+ drbd_flush_workqueue(&connection->sender_work);
rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
retcode = rv; /* FIXME: Type mismatch. */
@@ -1710,7 +1727,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
new_disk_conf = NULL;
new_plan = NULL;
- drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+ drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1726,7 +1743,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
device->read_cnt = 0;
device->writ_cnt = 0;
- drbd_reconsider_max_bio_size(device);
+ drbd_reconsider_max_bio_size(device, device->ldev);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
@@ -1845,7 +1862,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
put_ldev(device);
- conn_reconfig_done(first_peer_device(device)->connection);
+ conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -1856,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
drbd_force_state(device, NS(disk, D_DISKLESS));
drbd_md_sync(device);
fail:
- conn_reconfig_done(first_peer_device(device)->connection);
+ conn_reconfig_done(connection);
if (nbc) {
if (nbc->backing_bdev)
blkdev_put(nbc->backing_bdev,
@@ -1888,7 +1905,7 @@ static int adm_detach(struct drbd_device *device, int force)
}
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
+ drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
retcode = drbd_request_state(device, NS(disk, D_FAILED));
drbd_md_put_buffer(device);
/* D_FAILED will transition to DISKLESS. */
@@ -2654,8 +2671,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
if (retcode != NO_ERROR)
goto out;
- mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
+ if (!get_ldev(device)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync.
@@ -2679,6 +2701,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ put_ldev(device);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -2704,7 +2727,7 @@ out:
return 0;
}
-static int drbd_bmio_set_susp_al(struct drbd_device *device)
+static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
{
int rv;
@@ -2725,8 +2748,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
if (retcode != NO_ERROR)
goto out;
- mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
+ if (!get_ldev(device)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync.
@@ -2753,6 +2781,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ put_ldev(device);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -2892,7 +2921,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc
return list_first_entry(&resource->connections, struct drbd_connection, connections);
}
-int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
+static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
const struct sib_info *sib)
{
struct drbd_resource *resource = device->resource;
@@ -3622,13 +3651,6 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
unsigned seq;
int err = -ENOMEM;
- if (sib->sib_reason == SIB_SYNC_PROGRESS) {
- if (time_after(jiffies, device->rs_last_bcast + HZ))
- device->rs_last_bcast = jiffies;
- else
- return;
- }
-
seq = atomic_inc_return(&drbd_genl_seq);
msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
if (!msg)
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 89736bdbbc7..06e6147c760 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
seq_printf(seq, "%ld", v);
}
+static void drbd_get_syncer_progress(struct drbd_device *device,
+ union drbd_dev_state state, unsigned long *rs_total,
+ unsigned long *bits_left, unsigned int *per_mil_done)
+{
+ /* this is to break it at compile time when we change that, in case we
+ * want to support more than (1<<32) bits on a 32bit arch. */
+ typecheck(unsigned long, device->rs_total);
+ *rs_total = device->rs_total;
+
+ /* note: both rs_total and rs_left are in bits, i.e. in
+ * units of BM_BLOCK_SIZE.
+ * for the percentage, we don't care. */
+
+ if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
+ *bits_left = device->ov_left;
+ else
+ *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
+ /* >> 10 to prevent overflow,
+ * +1 to prevent division by zero */
+ if (*bits_left > *rs_total) {
+ /* D'oh. Maybe a logic bug somewhere. More likely just a race
+ * between state change and reset of rs_total.
+ */
+ *bits_left = *rs_total;
+ *per_mil_done = *rs_total ? 0 : 1000;
+ } else {
+ /* Make sure the division happens in long context.
+ * We allow up to one petabyte storage right now,
+ * at a granularity of 4k per bit that is 2**38 bits.
+ * After shift right and multiplication by 1000,
+ * this should still fit easily into a 32bit long,
+ * so we don't need a 64bit division on 32bit arch.
+ * Note: currently we don't support such large bitmaps on 32bit
+ * arch anyways, but no harm done to be prepared for it here.
+ */
+ unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
+ unsigned long left = *bits_left >> shift;
+ unsigned long total = 1UL + (*rs_total >> shift);
+ unsigned long tmp = 1000UL - left * 1000UL/total;
+ *per_mil_done = tmp;
+ }
+}
+
+
/*lge
* progress bars shamelessly adapted from driver/md/md.c
* output looks like
* [=====>..............] 33.5% (23456/123456)
* finish: 2:20:20 speed: 6,345 (6,456) K/sec
*/
-static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq)
+static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
+ union drbd_dev_state state)
{
- unsigned long db, dt, dbdt, rt, rs_left;
+ unsigned long db, dt, dbdt, rt, rs_total, rs_left;
unsigned int res;
int i, x, y;
int stalled = 0;
- drbd_get_syncer_progress(device, &rs_left, &res);
+ drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
x = res/50;
y = 20-x;
@@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_printf(seq, ".");
seq_printf(seq, "] ");
- if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
+ if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
seq_printf(seq, "verified:");
else
seq_printf(seq, "sync'ed:");
seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
/* if more than a few GB, display in MB */
- if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
+ if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
seq_printf(seq, "(%lu/%lu)M",
(unsigned long) Bit2KB(rs_left >> 10),
- (unsigned long) Bit2KB(device->rs_total >> 10));
+ (unsigned long) Bit2KB(rs_total >> 10));
else
seq_printf(seq, "(%lu/%lu)K\n\t",
(unsigned long) Bit2KB(rs_left),
- (unsigned long) Bit2KB(device->rs_total));
+ (unsigned long) Bit2KB(rs_total));
/* see drivers/md/md.c
* We do not want to overflow, so the order of operands and
@@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
if (dt == 0)
dt = 1;
- db = device->rs_total - rs_left;
+ db = rs_total - rs_left;
dbdt = Bit2KB(db/dt);
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, ")");
- if (device->state.conn == C_SYNC_TARGET ||
- device->state.conn == C_VERIFY_S) {
+ if (state.conn == C_SYNC_TARGET ||
+ state.conn == C_VERIFY_S) {
seq_printf(seq, " want: ");
seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
}
@@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
unsigned long bm_bits = drbd_bm_bits(device);
unsigned long bit_pos;
unsigned long long stop_sector = 0;
- if (device->state.conn == C_VERIFY_S ||
- device->state.conn == C_VERIFY_T) {
+ if (state.conn == C_VERIFY_S ||
+ state.conn == C_VERIFY_T) {
bit_pos = bm_bits - device->ov_left;
if (verify_can_do_stop_sector(device))
stop_sector = device->ov_stop_sector;
@@ -188,22 +233,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
}
}
-static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
-{
- struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
-
- seq_printf(seq, "%5d %s %s\n", bme->rs_left,
- bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
- bme->flags & BME_LOCKED ? "LOCKED" : "------"
- );
-}
-
static int drbd_seq_show(struct seq_file *seq, void *v)
{
int i, prev_i = -1;
const char *sn;
struct drbd_device *device;
struct net_conf *nc;
+ union drbd_dev_state state;
char wp;
static char write_ordering_chars[] = {
@@ -241,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
prev_i = i;
- sn = drbd_conn_str(device->state.conn);
+ state = device->state;
+ sn = drbd_conn_str(state.conn);
- if (device->state.conn == C_STANDALONE &&
- device->state.disk == D_DISKLESS &&
- device->state.role == R_SECONDARY) {
+ if (state.conn == C_STANDALONE &&
+ state.disk == D_DISKLESS &&
+ state.role == R_SECONDARY) {
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
/* reset device->congestion_reason */
@@ -258,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
"lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
i, sn,
- drbd_role_str(device->state.role),
- drbd_role_str(device->state.peer),
- drbd_disk_str(device->state.disk),
- drbd_disk_str(device->state.pdsk),
+ drbd_role_str(state.role),
+ drbd_role_str(state.peer),
+ drbd_disk_str(state.disk),
+ drbd_disk_str(state.pdsk),
wp,
drbd_suspended(device) ? 's' : 'r',
- device->state.aftr_isp ? 'a' : '-',
- device->state.peer_isp ? 'p' : '-',
- device->state.user_isp ? 'u' : '-',
+ state.aftr_isp ? 'a' : '-',
+ state.peer_isp ? 'p' : '-',
+ state.user_isp ? 'u' : '-',
device->congestion_reason ?: '-',
test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
device->send_cnt/2,
@@ -281,17 +318,17 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
atomic_read(&device->unacked_cnt),
atomic_read(&device->ap_bio_cnt),
first_peer_device(device)->connection->epochs,
- write_ordering_chars[first_peer_device(device)->connection->write_ordering]
+ write_ordering_chars[device->resource->write_ordering]
);
seq_printf(seq, " oos:%llu\n",
Bit2KB((unsigned long long)
drbd_bm_total_weight(device)));
}
- if (device->state.conn == C_SYNC_SOURCE ||
- device->state.conn == C_SYNC_TARGET ||
- device->state.conn == C_VERIFY_S ||
- device->state.conn == C_VERIFY_T)
- drbd_syncer_progress(device, seq);
+ if (state.conn == C_SYNC_SOURCE ||
+ state.conn == C_SYNC_TARGET ||
+ state.conn == C_VERIFY_S ||
+ state.conn == C_VERIFY_T)
+ drbd_syncer_progress(device, seq, state);
if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
lc_seq_printf_stats(seq, device->resync);
@@ -299,12 +336,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
put_ldev(device);
}
- if (proc_details >= 2) {
- if (device->resync) {
- lc_seq_dump_details(seq, device->resync, "rs_left",
- resync_dump_detail);
- }
- }
+ if (proc_details >= 2)
+ seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
}
rcu_read_unlock();
@@ -316,7 +349,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file)
int err;
if (try_module_get(THIS_MODULE)) {
- err = single_open(file, drbd_seq_show, PDE_DATA(inode));
+ err = single_open(file, drbd_seq_show, NULL);
if (err)
module_put(THIS_MODULE);
return err;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5b17ec88ea0..9342b8da73a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -362,17 +362,14 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
goto fail;
}
+ memset(peer_req, 0, sizeof(*peer_req));
+ INIT_LIST_HEAD(&peer_req->w.list);
drbd_clear_interval(&peer_req->i);
peer_req->i.size = data_size;
peer_req->i.sector = sector;
- peer_req->i.local = false;
- peer_req->i.waiting = false;
-
- peer_req->epoch = NULL;
+ peer_req->submit_jif = jiffies;
peer_req->peer_device = peer_device;
peer_req->pages = page;
- atomic_set(&peer_req->pending_bios, 0);
- peer_req->flags = 0;
/*
* The block_id is opaque to the receiver. It is not endianness
* converted, and sent back to the sender unchanged.
@@ -389,11 +386,16 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
int is_net)
{
+ might_sleep();
if (peer_req->flags & EE_HAS_DIGEST)
kfree(peer_req->digest);
drbd_free_pages(device, peer_req->pages, is_net);
D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
+ if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
+ peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
+ drbd_al_complete_io(device, &peer_req->i);
+ }
mempool_free(peer_req, drbd_ee_mempool);
}
@@ -791,8 +793,18 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
{
unsigned int header_size = drbd_header_size(connection);
struct packet_info pi;
+ struct net_conf *nc;
int err;
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ if (!nc) {
+ rcu_read_unlock();
+ return -EIO;
+ }
+ sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
+ rcu_read_unlock();
+
err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
if (err != header_size) {
if (err >= 0)
@@ -809,7 +821,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
* drbd_socket_okay() - Free the socket if its connection is not okay
* @sock: pointer to the pointer to the socket.
*/
-static int drbd_socket_okay(struct socket **sock)
+static bool drbd_socket_okay(struct socket **sock)
{
int rr;
char tb[4];
@@ -827,6 +839,30 @@ static int drbd_socket_okay(struct socket **sock)
return false;
}
}
+
+static bool connection_established(struct drbd_connection *connection,
+ struct socket **sock1,
+ struct socket **sock2)
+{
+ struct net_conf *nc;
+ int timeout;
+ bool ok;
+
+ if (!*sock1 || !*sock2)
+ return false;
+
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
+ rcu_read_unlock();
+ schedule_timeout_interruptible(timeout);
+
+ ok = drbd_socket_okay(sock1);
+ ok = drbd_socket_okay(sock2) && ok;
+
+ return ok;
+}
+
/* Gets called if a connection is established, or if a new minor gets created
in a connection */
int drbd_connected(struct drbd_peer_device *peer_device)
@@ -868,8 +904,8 @@ static int conn_connect(struct drbd_connection *connection)
struct drbd_socket sock, msock;
struct drbd_peer_device *peer_device;
struct net_conf *nc;
- int vnr, timeout, h, ok;
- bool discard_my_data;
+ int vnr, timeout, h;
+ bool discard_my_data, ok;
enum drbd_state_rv rv;
struct accept_wait_data ad = {
.connection = connection,
@@ -913,17 +949,8 @@ static int conn_connect(struct drbd_connection *connection)
}
}
- if (sock.socket && msock.socket) {
- rcu_read_lock();
- nc = rcu_dereference(connection->net_conf);
- timeout = nc->ping_timeo * HZ / 10;
- rcu_read_unlock();
- schedule_timeout_interruptible(timeout);
- ok = drbd_socket_okay(&sock.socket);
- ok = drbd_socket_okay(&msock.socket) && ok;
- if (ok)
- break;
- }
+ if (connection_established(connection, &sock.socket, &msock.socket))
+ break;
retry:
s = drbd_wait_for_connect(connection, &ad);
@@ -969,8 +996,7 @@ randomize:
goto out_release_sockets;
}
- ok = drbd_socket_okay(&sock.socket);
- ok = drbd_socket_okay(&msock.socket) && ok;
+ ok = connection_established(connection, &sock.socket, &msock.socket);
} while (!ok);
if (ad.s_listen)
@@ -1151,7 +1177,7 @@ static void drbd_flush(struct drbd_connection *connection)
struct drbd_peer_device *peer_device;
int vnr;
- if (connection->write_ordering >= WO_bdev_flush) {
+ if (connection->resource->write_ordering >= WO_bdev_flush) {
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
@@ -1161,14 +1187,22 @@ static void drbd_flush(struct drbd_connection *connection)
kref_get(&device->kref);
rcu_read_unlock();
+ /* Right now, we have only this one synchronous code path
+ * for flushes between request epochs.
+ * We may want to make those asynchronous,
+ * or at least parallelize the flushes to the volume devices.
+ */
+ device->flush_jif = jiffies;
+ set_bit(FLUSH_PENDING, &device->flags);
rv = blkdev_issue_flush(device->ldev->backing_bdev,
GFP_NOIO, NULL);
+ clear_bit(FLUSH_PENDING, &device->flags);
if (rv) {
drbd_info(device, "local disk flush failed with status %d\n", rv);
/* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0
* if (rv == -EOPNOTSUPP) */
- drbd_bump_write_ordering(connection, WO_drain_io);
+ drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
}
put_ldev(device);
kref_put(&device->kref, drbd_destroy_device);
@@ -1257,15 +1291,30 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
return rv;
}
+static enum write_ordering_e
+max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
+{
+ struct disk_conf *dc;
+
+ dc = rcu_dereference(bdev->disk_conf);
+
+ if (wo == WO_bdev_flush && !dc->disk_flushes)
+ wo = WO_drain_io;
+ if (wo == WO_drain_io && !dc->disk_drain)
+ wo = WO_none;
+
+ return wo;
+}
+
/**
* drbd_bump_write_ordering() - Fall back to an other write ordering method
* @connection: DRBD connection.
* @wo: Write ordering method to try.
*/
-void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+ enum write_ordering_e wo)
{
- struct disk_conf *dc;
- struct drbd_peer_device *peer_device;
+ struct drbd_device *device;
enum write_ordering_e pwo;
int vnr;
static char *write_ordering_str[] = {
@@ -1274,26 +1323,27 @@ void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ord
[WO_bdev_flush] = "flush",
};
- pwo = connection->write_ordering;
- wo = min(pwo, wo);
+ pwo = resource->write_ordering;
+ if (wo != WO_bdev_flush)
+ wo = min(pwo, wo);
rcu_read_lock();
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
+ idr_for_each_entry(&resource->devices, device, vnr) {
+ if (get_ldev(device)) {
+ wo = max_allowed_wo(device->ldev, wo);
+ if (device->ldev == bdev)
+ bdev = NULL;
+ put_ldev(device);
+ }
+ }
- if (!get_ldev_if_state(device, D_ATTACHING))
- continue;
- dc = rcu_dereference(device->ldev->disk_conf);
+ if (bdev)
+ wo = max_allowed_wo(bdev, wo);
- if (wo == WO_bdev_flush && !dc->disk_flushes)
- wo = WO_drain_io;
- if (wo == WO_drain_io && !dc->disk_drain)
- wo = WO_none;
- put_ldev(device);
- }
rcu_read_unlock();
- connection->write_ordering = wo;
- if (pwo != connection->write_ordering || wo == WO_bdev_flush)
- drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
+
+ resource->write_ordering = wo;
+ if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+ drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
/**
@@ -1330,6 +1380,13 @@ int drbd_submit_peer_request(struct drbd_device *device,
/* wait for all pending IO completions, before we start
* zeroing things out. */
conn_wait_active_ee_empty(first_peer_device(device)->connection);
+ /* add it to the active list now,
+ * so we can find it to present it in debugfs */
+ peer_req->submit_jif = jiffies;
+ peer_req->flags |= EE_SUBMITTED;
+ spin_lock_irq(&device->resource->req_lock);
+ list_add_tail(&peer_req->w.list, &device->active_ee);
+ spin_unlock_irq(&device->resource->req_lock);
if (blkdev_issue_zeroout(device->ldev->backing_bdev,
sector, ds >> 9, GFP_NOIO))
peer_req->flags |= EE_WAS_ERROR;
@@ -1398,6 +1455,9 @@ submit:
D_ASSERT(device, page == NULL);
atomic_set(&peer_req->pending_bios, n_bios);
+ /* for debugfs: update timestamp, mark as submitted */
+ peer_req->submit_jif = jiffies;
+ peer_req->flags |= EE_SUBMITTED;
do {
bio = bios;
bios = bios->bi_next;
@@ -1471,7 +1531,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
* R_PRIMARY crashes now.
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
- switch (connection->write_ordering) {
+ switch (connection->resource->write_ordering) {
case WO_none:
if (rv == FE_RECYCLED)
return 0;
@@ -1498,7 +1558,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
return 0;
default:
- drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
+ drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
+ connection->resource->write_ordering);
return -EIO;
}
@@ -1531,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
struct drbd_peer_request *peer_req;
struct page *page;
int dgs, ds, err;
- int data_size = pi->size;
+ unsigned int data_size = pi->size;
void *dig_in = peer_device->connection->int_dig_in;
void *dig_vv = peer_device->connection->int_dig_vv;
unsigned long *data;
@@ -1578,6 +1639,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
if (!peer_req)
return NULL;
+ peer_req->flags |= EE_WRITE;
if (trim)
return peer_req;
@@ -1734,9 +1796,10 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
* respective _drbd_clear_done_ee */
peer_req->w.cb = e_end_resync_block;
+ peer_req->submit_jif = jiffies;
spin_lock_irq(&device->resource->req_lock);
- list_add(&peer_req->w.list, &device->sync_ee);
+ list_add_tail(&peer_req->w.list, &device->sync_ee);
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
@@ -1889,6 +1952,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
}
dec_unacked(device);
}
+
/* we delete from the conflict detection hash _after_ we sent out the
* P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
if (peer_req->flags & EE_IN_INTERVAL_TREE) {
@@ -2115,6 +2179,8 @@ static int handle_write_conflicts(struct drbd_device *device,
drbd_for_each_overlap(i, &device->write_requests, sector, size) {
if (i == &peer_req->i)
continue;
+ if (i->completed)
+ continue;
if (!i->local) {
/*
@@ -2147,7 +2213,6 @@ static int handle_write_conflicts(struct drbd_device *device,
(unsigned long long)sector, size,
superseded ? "local" : "remote");
- inc_unacked(device);
peer_req->w.cb = superseded ? e_send_superseded :
e_send_retry_write;
list_add_tail(&peer_req->w.list, &device->done_ee);
@@ -2206,6 +2271,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
{
struct drbd_peer_device *peer_device;
struct drbd_device *device;
+ struct net_conf *nc;
sector_t sector;
struct drbd_peer_request *peer_req;
struct p_data *p = pi->data;
@@ -2245,6 +2311,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
}
peer_req->w.cb = e_end_block;
+ peer_req->submit_jif = jiffies;
+ peer_req->flags |= EE_APPLICATION;
dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(dp_flags);
@@ -2271,9 +2339,36 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
spin_unlock(&connection->epoch_lock);
rcu_read_lock();
- tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
+ nc = rcu_dereference(peer_device->connection->net_conf);
+ tp = nc->two_primaries;
+ if (peer_device->connection->agreed_pro_version < 100) {
+ switch (nc->wire_protocol) {
+ case DRBD_PROT_C:
+ dp_flags |= DP_SEND_WRITE_ACK;
+ break;
+ case DRBD_PROT_B:
+ dp_flags |= DP_SEND_RECEIVE_ACK;
+ break;
+ }
+ }
rcu_read_unlock();
+
+ if (dp_flags & DP_SEND_WRITE_ACK) {
+ peer_req->flags |= EE_SEND_WRITE_ACK;
+ inc_unacked(device);
+ /* corresponding dec_unacked() in e_end_block()
+ * respective _drbd_clear_done_ee */
+ }
+
+ if (dp_flags & DP_SEND_RECEIVE_ACK) {
+ /* I really don't like it that the receiver thread
+ * sends on the msock, but anyways */
+ drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+ }
+
if (tp) {
+ /* two primaries implies protocol C */
+ D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
peer_req->flags |= EE_IN_INTERVAL_TREE;
err = wait_for_and_update_peer_seq(peer_device, peer_seq);
if (err)
@@ -2297,44 +2392,18 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
* active_ee to become empty in drbd_submit_peer_request();
* better not add ourselves here. */
if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
- list_add(&peer_req->w.list, &device->active_ee);
+ list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
if (device->state.conn == C_SYNC_TARGET)
wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
- if (peer_device->connection->agreed_pro_version < 100) {
- rcu_read_lock();
- switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
- case DRBD_PROT_C:
- dp_flags |= DP_SEND_WRITE_ACK;
- break;
- case DRBD_PROT_B:
- dp_flags |= DP_SEND_RECEIVE_ACK;
- break;
- }
- rcu_read_unlock();
- }
-
- if (dp_flags & DP_SEND_WRITE_ACK) {
- peer_req->flags |= EE_SEND_WRITE_ACK;
- inc_unacked(device);
- /* corresponding dec_unacked() in e_end_block()
- * respective _drbd_clear_done_ee */
- }
-
- if (dp_flags & DP_SEND_RECEIVE_ACK) {
- /* I really don't like it that the receiver thread
- * sends on the msock, but anyways */
- drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
- }
-
if (device->state.pdsk < D_INCONSISTENT) {
/* In case we have the only disk of the cluster, */
drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
- peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
- drbd_al_begin_io(device, &peer_req->i, true);
+ drbd_al_begin_io(device, &peer_req->i);
+ peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
}
err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
@@ -2347,8 +2416,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
list_del(&peer_req->w.list);
drbd_remove_epoch_entry_interval(device, peer_req);
spin_unlock_irq(&device->resource->req_lock);
- if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
+ if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
+ peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
drbd_al_complete_io(device, &peer_req->i);
+ }
out_interrupted:
drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
@@ -2368,13 +2439,14 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster.
*/
-bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+ bool throttle_if_app_is_waiting)
{
struct lc_element *tmp;
- bool throttle = true;
+ bool throttle = drbd_rs_c_min_rate_throttle(device);
- if (!drbd_rs_c_min_rate_throttle(device))
- return false;
+ if (!throttle || throttle_if_app_is_waiting)
+ return throttle;
spin_lock_irq(&device->al_lock);
tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
@@ -2382,7 +2454,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_PRIORITY, &bm_ext->flags))
throttle = false;
- /* Do not slow down if app IO is already waiting for this extent */
+ /* Do not slow down if app IO is already waiting for this extent,
+ * and our progress is necessary for application IO to complete. */
}
spin_unlock_irq(&device->al_lock);
@@ -2407,7 +2480,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
(int)part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&device->rs_sect_ev);
- if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
+
+ if (atomic_read(&device->ap_actlog_cnt)
+ || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
unsigned long rs_left;
int i;
@@ -2508,6 +2583,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
peer_req->w.cb = w_e_end_data_req;
fault_type = DRBD_FAULT_DT_RD;
/* application IO, don't drbd_rs_begin_io */
+ peer_req->flags |= EE_APPLICATION;
goto submit;
case P_RS_DATA_REQUEST:
@@ -2538,6 +2614,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
peer_req->w.cb = w_e_end_csum_rs_req;
/* used in the sector offset progress display */
device->bm_resync_fo = BM_SECT_TO_BIT(sector);
+ /* remember to report stats in drbd_resync_finished */
+ device->use_csums = true;
} else if (pi->cmd == P_OV_REPLY) {
/* track progress, we may need to throttle */
atomic_add(size >> 9, &device->rs_sect_in);
@@ -2595,8 +2673,20 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
* we would also throttle its application reads.
* In that case, throttling is done on the SyncTarget only.
*/
- if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
+
+ /* Even though this may be a resync request, we do add to "read_ee";
+ * "sync_ee" is only used for resync WRITEs.
+ * Add to list early, so debugfs can find this request
+ * even if we have to sleep below. */
+ spin_lock_irq(&device->resource->req_lock);
+ list_add_tail(&peer_req->w.list, &device->read_ee);
+ spin_unlock_irq(&device->resource->req_lock);
+
+ update_receiver_timing_details(connection, drbd_rs_should_slow_down);
+ if (device->state.peer != R_PRIMARY
+ && drbd_rs_should_slow_down(device, sector, false))
schedule_timeout_uninterruptible(HZ/10);
+ update_receiver_timing_details(connection, drbd_rs_begin_io);
if (drbd_rs_begin_io(device, sector))
goto out_free_e;
@@ -2604,22 +2694,20 @@ submit_for_resync:
atomic_add(size >> 9, &device->rs_sect_ev);
submit:
+ update_receiver_timing_details(connection, drbd_submit_peer_request);
inc_unacked(device);
- spin_lock_irq(&device->resource->req_lock);
- list_add_tail(&peer_req->w.list, &device->read_ee);
- spin_unlock_irq(&device->resource->req_lock);
-
if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
return 0;
/* don't care for the reason here */
drbd_err(device, "submit failed, triggering re-connect\n");
+
+out_free_e:
spin_lock_irq(&device->resource->req_lock);
list_del(&peer_req->w.list);
spin_unlock_irq(&device->resource->req_lock);
/* no drbd_rs_complete_io(), we are dropping the connection anyways */
-out_free_e:
put_ldev(device);
drbd_free_peer_req(device, peer_req);
return -EIO;
@@ -2842,8 +2930,10 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
-1091 requires proto 91
-1096 requires proto 96
*/
-static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
+static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
u64 self, peer;
int i, j;
@@ -2869,7 +2959,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
@@ -2892,7 +2982,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
@@ -2925,7 +3015,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
case 1: /* self_pri && !peer_pri */ return 1;
case 2: /* !self_pri && peer_pri */ return -1;
case 3: /* self_pri && peer_pri */
- dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
+ dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
return dc ? -1 : 1;
}
}
@@ -2938,14 +3028,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
*rule_nr = 51;
peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
if (self == peer) {
- if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
+ if (connection->agreed_pro_version < 96 ?
(device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
(device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
/* The last P_SYNC_UUID did not get though. Undo the last start of
resync as sync source modifications of the peer's UUIDs. */
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
@@ -2975,14 +3065,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
*rule_nr = 71;
self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
if (self == peer) {
- if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
+ if (connection->agreed_pro_version < 96 ?
(device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
(device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
/* The last P_SYNC_UUID did not get though. Undo the last start of
resync as sync source modifications of our UUIDs. */
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
@@ -3352,8 +3442,7 @@ disconnect:
* return: NULL (alg name was "")
* ERR_PTR(error) if something goes wrong
* or the crypto hash ptr, if it worked out ok. */
-static
-struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
+static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
const char *alg, const char *name)
{
struct crypto_hash *tfm;
@@ -3639,7 +3728,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
struct drbd_device *device;
struct p_sizes *p = pi->data;
enum determine_dev_size dd = DS_UNCHANGED;
- sector_t p_size, p_usize, my_usize;
+ sector_t p_size, p_usize, p_csize, my_usize;
int ldsc = 0; /* local disk size changed */
enum dds_flags ddsf;
@@ -3650,6 +3739,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
p_size = be64_to_cpu(p->d_size);
p_usize = be64_to_cpu(p->u_size);
+ p_csize = be64_to_cpu(p->c_size);
/* just store the peer's disk size for now.
* we still need to figure out whether we accept that. */
@@ -3710,7 +3800,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
}
device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
- drbd_reconsider_max_bio_size(device);
/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
In case we cleared the QUEUE_FLAG_DISCARD from our queue in
drbd_reconsider_max_bio_size(), we can be sure that after
@@ -3718,14 +3807,28 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(device)) {
+ drbd_reconsider_max_bio_size(device, device->ldev);
dd = drbd_determine_dev_size(device, ddsf, NULL);
put_ldev(device);
if (dd == DS_ERROR)
return -EIO;
drbd_md_sync(device);
} else {
- /* I am diskless, need to accept the peer's size. */
- drbd_set_my_capacity(device, p_size);
+ /*
+ * I am diskless, need to accept the peer's *current* size.
+ * I must NOT accept the peers backing disk size,
+ * it may have been larger than mine all along...
+ *
+ * At this point, the peer knows more about my disk, or at
+ * least about what we last agreed upon, than myself.
+ * So if his c_size is less than his d_size, the most likely
+ * reason is that *my* d_size was smaller last time we checked.
+ *
+ * However, if he sends a zero current size,
+ * take his (user-capped or) backing disk size anyways.
+ */
+ drbd_reconsider_max_bio_size(device, NULL);
+ drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
}
if (get_ldev(device)) {
@@ -4501,6 +4604,7 @@ static void drbdd(struct drbd_connection *connection)
struct data_cmd *cmd;
drbd_thread_current_set_cpu(&connection->receiver);
+ update_receiver_timing_details(connection, drbd_recv_header);
if (drbd_recv_header(connection, &pi))
goto err_out;
@@ -4519,12 +4623,14 @@ static void drbdd(struct drbd_connection *connection)
}
if (shs) {
+ update_receiver_timing_details(connection, drbd_recv_all_warn);
err = drbd_recv_all_warn(connection, pi.data, shs);
if (err)
goto err_out;
pi.size -= shs;
}
+ update_receiver_timing_details(connection, cmd->fn);
err = cmd->fn(connection, &pi);
if (err) {
drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 09803d0d520..c67717d572d 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -52,7 +52,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request
static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
{
int rw = bio_data_dir(req->master_bio);
- unsigned long duration = jiffies - req->start_time;
+ unsigned long duration = jiffies - req->start_jif;
int cpu;
cpu = part_stat_lock();
part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration);
@@ -66,7 +66,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
{
struct drbd_request *req;
- req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+ req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO);
if (!req)
return NULL;
@@ -84,6 +84,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
+ INIT_LIST_HEAD(&req->req_pending_master_completion);
+ INIT_LIST_HEAD(&req->req_pending_local);
/* one reference to be put by __drbd_make_request */
atomic_set(&req->completion_ref, 1);
@@ -92,6 +94,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
return req;
}
+static void drbd_remove_request_interval(struct rb_root *root,
+ struct drbd_request *req)
+{
+ struct drbd_device *device = req->device;
+ struct drbd_interval *i = &req->i;
+
+ drbd_remove_interval(root, i);
+
+ /* Wake up any processes waiting for this request to complete. */
+ if (i->waiting)
+ wake_up(&device->misc_wait);
+}
+
void drbd_req_destroy(struct kref *kref)
{
struct drbd_request *req = container_of(kref, struct drbd_request, kref);
@@ -107,14 +122,30 @@ void drbd_req_destroy(struct kref *kref)
return;
}
- /* remove it from the transfer log.
- * well, only if it had been there in the first
- * place... if it had not (local only or conflicting
- * and never sent), it should still be "empty" as
- * initialized in drbd_req_new(), so we can list_del() it
- * here unconditionally */
+ /* If called from mod_rq_state (expected normal case) or
+ * drbd_send_and_submit (the less likely normal path), this holds the
+ * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
+ * though it may be still empty (never added to the transfer log).
+ *
+ * If called from do_retry(), we do NOT hold the req_lock, but we are
+ * still allowed to unconditionally list_del(&req->tl_requests),
+ * because it will be on a local on-stack list only. */
list_del_init(&req->tl_requests);
+ /* finally remove the request from the conflict detection
+ * respective block_id verification interval tree. */
+ if (!drbd_interval_empty(&req->i)) {
+ struct rb_root *root;
+
+ if (s & RQ_WRITE)
+ root = &device->write_requests;
+ else
+ root = &device->read_requests;
+ drbd_remove_request_interval(root, req);
+ } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
+ drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
+ s, (unsigned long long)req->i.sector, req->i.size);
+
/* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to
* release the reference to the activity log. */
@@ -188,19 +219,6 @@ void complete_master_bio(struct drbd_device *device,
}
-static void drbd_remove_request_interval(struct rb_root *root,
- struct drbd_request *req)
-{
- struct drbd_device *device = req->device;
- struct drbd_interval *i = &req->i;
-
- drbd_remove_interval(root, i);
-
- /* Wake up any processes waiting for this request to complete. */
- if (i->waiting)
- wake_up(&device->misc_wait);
-}
-
/* Helper for __req_mod().
* Set m->bio to the master bio, if it is fit to be completed,
* or leave it alone (it is initialized to NULL in __req_mod),
@@ -254,18 +272,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
error = PTR_ERR(req->private_bio);
- /* remove the request from the conflict detection
- * respective block_id verification hash */
- if (!drbd_interval_empty(&req->i)) {
- struct rb_root *root;
-
- if (rw == WRITE)
- root = &device->write_requests;
- else
- root = &device->read_requests;
- drbd_remove_request_interval(root, req);
- }
-
/* Before we can signal completion to the upper layers,
* we may need to close the current transfer log epoch.
* We are within the request lock, so we can simply compare
@@ -301,9 +307,24 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
m->error = ok ? 0 : (error ?: -EIO);
m->bio = req->master_bio;
req->master_bio = NULL;
+ /* We leave it in the tree, to be able to verify later
+ * write-acks in protocol != C during resync.
+ * But we mark it as "complete", so it won't be counted as
+ * conflict in a multi-primary setup. */
+ req->i.completed = true;
}
+
+ if (req->i.waiting)
+ wake_up(&device->misc_wait);
+
+ /* Either we are about to complete to upper layers,
+ * or we will restart this request.
+ * In either case, the request object will be destroyed soon,
+ * so better remove it from all lists. */
+ list_del_init(&req->req_pending_master_completion);
}
+/* still holds resource->req_lock */
static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
{
struct drbd_device *device = req->device;
@@ -324,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
return 1;
}
+static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_next == NULL)
+ connection->req_next = req;
+}
+
+static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_next != req)
+ return;
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+ const unsigned s = req->rq_state;
+ if (s & RQ_NET_QUEUED)
+ break;
+ }
+ if (&req->tl_requests == &connection->transfer_log)
+ req = NULL;
+ connection->req_next = req;
+}
+
+static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_ack_pending == NULL)
+ connection->req_ack_pending = req;
+}
+
+static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_ack_pending != req)
+ return;
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+ const unsigned s = req->rq_state;
+ if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
+ break;
+ }
+ if (&req->tl_requests == &connection->transfer_log)
+ req = NULL;
+ connection->req_ack_pending = req;
+}
+
+static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_not_net_done == NULL)
+ connection->req_not_net_done = req;
+}
+
+static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_not_net_done != req)
+ return;
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+ const unsigned s = req->rq_state;
+ if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
+ break;
+ }
+ if (&req->tl_requests == &connection->transfer_log)
+ req = NULL;
+ connection->req_not_net_done = req;
+}
+
/* I'd like this to be the only place that manipulates
* req->completion_ref and req->kref. */
static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
int clear, int set)
{
struct drbd_device *device = req->device;
+ struct drbd_peer_device *peer_device = first_peer_device(device);
unsigned s = req->rq_state;
int c_put = 0;
int k_put = 0;
@@ -356,14 +456,23 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
atomic_inc(&req->completion_ref);
}
- if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED))
+ if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
atomic_inc(&req->completion_ref);
+ set_if_null_req_next(peer_device, req);
+ }
if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
kref_get(&req->kref); /* wait for the DONE */
- if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT))
- atomic_add(req->i.size >> 9, &device->ap_in_flight);
+ if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
+ /* potentially already completed in the asender thread */
+ if (!(s & RQ_NET_DONE)) {
+ atomic_add(req->i.size >> 9, &device->ap_in_flight);
+ set_if_null_req_not_net_done(peer_device, req);
+ }
+ if (s & RQ_NET_PENDING)
+ set_if_null_req_ack_pending(peer_device, req);
+ }
if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
atomic_inc(&req->completion_ref);
@@ -386,20 +495,34 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
++k_put;
else
++c_put;
+ list_del_init(&req->req_pending_local);
}
if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
dec_ap_pending(device);
++c_put;
+ req->acked_jif = jiffies;
+ advance_conn_req_ack_pending(peer_device, req);
}
- if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED))
+ if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
++c_put;
+ advance_conn_req_next(peer_device, req);
+ }
- if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
- if (req->rq_state & RQ_NET_SENT)
+ if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
+ if (s & RQ_NET_SENT)
atomic_sub(req->i.size >> 9, &device->ap_in_flight);
- ++k_put;
+ if (s & RQ_EXP_BARR_ACK)
+ ++k_put;
+ req->net_done_jif = jiffies;
+
+ /* in ahead/behind mode, or just in case,
+ * before we finally destroy this request,
+ * the caching pointers must not reference it anymore */
+ advance_conn_req_next(peer_device, req);
+ advance_conn_req_ack_pending(peer_device, req);
+ advance_conn_req_not_net_done(peer_device, req);
}
/* potentially complete and destroy */
@@ -439,6 +562,19 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
bdevname(device->ldev->backing_bdev, b));
}
+/* Helper for HANDED_OVER_TO_NETWORK.
+ * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
+ * Is it also still "PENDING"?
+ * --> If so, clear PENDING and set NET_OK below.
+ * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
+ * (and we must not set RQ_NET_OK) */
+static inline bool is_pending_write_protocol_A(struct drbd_request *req)
+{
+ return (req->rq_state &
+ (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
+ == (RQ_WRITE|RQ_NET_PENDING);
+}
+
/* obviously this could be coded as many single functions
* instead of one huge switch,
* or by putting the code directly in the respective locations
@@ -454,7 +590,9 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m)
{
- struct drbd_device *device = req->device;
+ struct drbd_device *const device = req->device;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
struct net_conf *nc;
int p, rv = 0;
@@ -477,7 +615,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
* and from w_read_retry_remote */
D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
p = nc->wire_protocol;
rcu_read_unlock();
req->rq_state |=
@@ -549,7 +687,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
mod_rq_state(req, m, 0, RQ_NET_QUEUED);
req->w.cb = w_send_read_req;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
break;
@@ -585,23 +723,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
req->w.cb = w_send_dblock;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
/* close the epoch, in case it outgrew the limit */
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
p = nc->max_epoch_size;
rcu_read_unlock();
- if (first_peer_device(device)->connection->current_tle_writes >= p)
- start_new_tl_epoch(first_peer_device(device)->connection);
+ if (connection->current_tle_writes >= p)
+ start_new_tl_epoch(connection);
break;
case QUEUE_FOR_SEND_OOS:
mod_rq_state(req, m, 0, RQ_NET_QUEUED);
req->w.cb = w_send_out_of_sync;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
break;
@@ -615,18 +753,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case HANDED_OVER_TO_NETWORK:
/* assert something? */
- if (bio_data_dir(req->master_bio) == WRITE &&
- !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
+ if (is_pending_write_protocol_A(req))
/* this is what is dangerous about protocol A:
* pretend it was successfully written on the peer. */
- if (req->rq_state & RQ_NET_PENDING)
- mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
- /* else: neg-ack was faster... */
- /* it is still not yet RQ_NET_DONE until the
- * corresponding epoch barrier got acked as well,
- * so we know what to dirty on connection loss */
- }
- mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
+ mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
+ RQ_NET_SENT|RQ_NET_OK);
+ else
+ mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
+ /* It is still not yet RQ_NET_DONE until the
+ * corresponding epoch barrier got acked as well,
+ * so we know what to dirty on connection loss. */
break;
case OOS_HANDED_TO_NETWORK:
@@ -658,12 +794,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case WRITE_ACKED_BY_PEER_AND_SIS:
req->rq_state |= RQ_NET_SIS;
case WRITE_ACKED_BY_PEER:
- D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
- /* protocol C; successfully written on peer.
+ /* Normal operation protocol C: successfully written on peer.
+ * During resync, even in protocol != C,
+ * we requested an explicit write ack anyways.
+ * Which means we cannot even assert anything here.
* Nothing more to do here.
* We want to keep the tl in place for all protocols, to cater
* for volatile write-back caches on lower level devices. */
-
goto ack_common;
case RECV_ACKED_BY_PEER:
D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
@@ -671,7 +808,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
* see also notes above in HANDED_OVER_TO_NETWORK about
* protocol != C */
ack_common:
- D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
break;
@@ -714,7 +850,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
get_ldev(device); /* always succeeds in this call path */
req->w.cb = w_restart_disk_io;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
break;
@@ -736,7 +872,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
if (req->w.cb) {
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ /* w.cb expected to be w_send_dblock, or w_send_read_req */
+ drbd_queue_work(&connection->sender_work,
&req->w);
rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
} /* else: FIXME can this happen? */
@@ -769,7 +906,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case QUEUE_AS_DRBD_BARRIER:
- start_new_tl_epoch(first_peer_device(device)->connection);
+ start_new_tl_epoch(connection);
mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
break;
};
@@ -886,6 +1023,9 @@ static void maybe_pull_ahead(struct drbd_device *device)
connection->agreed_pro_version < 96)
return;
+ if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
+ return; /* nothing to do ... */
+
/* If I don't even have good local storage, we can not reasonably try
* to pull ahead of the peer. We also need the local reference to make
* sure device->act_log is there.
@@ -1021,6 +1161,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
* stable storage, and this is a WRITE, we may not even submit
* this bio. */
if (get_ldev(device)) {
+ req->pre_submit_jif = jiffies;
if (drbd_insert_fault(device,
rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
@@ -1035,10 +1176,14 @@ drbd_submit_req_private_bio(struct drbd_request *req)
static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
{
- spin_lock(&device->submit.lock);
+ spin_lock_irq(&device->resource->req_lock);
list_add_tail(&req->tl_requests, &device->submit.writes);
- spin_unlock(&device->submit.lock);
+ list_add_tail(&req->req_pending_master_completion,
+ &device->pending_master_completion[1 /* WRITE */]);
+ spin_unlock_irq(&device->resource->req_lock);
queue_work(device->submit.wq, &device->submit.worker);
+ /* do_submit() may sleep internally on al_wait, too */
+ wake_up(&device->al_wait);
}
/* returns the new drbd_request pointer, if the caller is expected to
@@ -1047,7 +1192,7 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re
* Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
*/
static struct drbd_request *
-drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time)
+drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
{
const int rw = bio_data_dir(bio);
struct drbd_request *req;
@@ -1062,7 +1207,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
bio_endio(bio, -ENOMEM);
return ERR_PTR(-ENOMEM);
}
- req->start_time = start_time;
+ req->start_jif = start_jif;
if (!get_ldev(device)) {
bio_put(req->private_bio);
@@ -1075,10 +1220,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
if (rw == WRITE && req->private_bio && req->i.size
&& !test_bit(AL_SUSPENDED, &device->flags)) {
if (!drbd_al_begin_io_fastpath(device, &req->i)) {
+ atomic_inc(&device->ap_actlog_cnt);
drbd_queue_write(device, req);
return NULL;
}
req->rq_state |= RQ_IN_ACT_LOG;
+ req->in_actlog_jif = jiffies;
}
return req;
@@ -1086,11 +1233,13 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
{
+ struct drbd_resource *resource = device->resource;
const int rw = bio_rw(req->master_bio);
struct bio_and_error m = { NULL, };
bool no_remote = false;
+ bool submit_private_bio = false;
- spin_lock_irq(&device->resource->req_lock);
+ spin_lock_irq(&resource->req_lock);
if (rw == WRITE) {
/* This may temporarily give up the req_lock,
* but will re-aquire it before it returns here.
@@ -1148,13 +1297,18 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
no_remote = true;
}
+ /* If it took the fast path in drbd_request_prepare, add it here.
+ * The slow path has added it already. */
+ if (list_empty(&req->req_pending_master_completion))
+ list_add_tail(&req->req_pending_master_completion,
+ &device->pending_master_completion[rw == WRITE]);
if (req->private_bio) {
/* needs to be marked within the same spinlock */
+ list_add_tail(&req->req_pending_local,
+ &device->pending_completion[rw == WRITE]);
_req_mod(req, TO_BE_SUBMITTED);
/* but we need to give up the spinlock to submit */
- spin_unlock_irq(&device->resource->req_lock);
- drbd_submit_req_private_bio(req);
- spin_lock_irq(&device->resource->req_lock);
+ submit_private_bio = true;
} else if (no_remote) {
nodata:
if (__ratelimit(&drbd_ratelimit_state))
@@ -1167,15 +1321,23 @@ nodata:
out:
if (drbd_req_put_completion_ref(req, &m, 1))
kref_put(&req->kref, drbd_req_destroy);
- spin_unlock_irq(&device->resource->req_lock);
-
+ spin_unlock_irq(&resource->req_lock);
+
+ /* Even though above is a kref_put(), this is safe.
+ * As long as we still need to submit our private bio,
+ * we hold a completion ref, and the request cannot disappear.
+ * If however this request did not even have a private bio to submit
+ * (e.g. remote read), req may already be invalid now.
+ * That's why we cannot check on req->private_bio. */
+ if (submit_private_bio)
+ drbd_submit_req_private_bio(req);
if (m.bio)
complete_master_bio(device, &m);
}
-void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time)
+void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
{
- struct drbd_request *req = drbd_request_prepare(device, bio, start_time);
+ struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
if (IS_ERR_OR_NULL(req))
return;
drbd_send_and_submit(device, req);
@@ -1194,6 +1356,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
continue;
req->rq_state |= RQ_IN_ACT_LOG;
+ req->in_actlog_jif = jiffies;
+ atomic_dec(&device->ap_actlog_cnt);
}
list_del_init(&req->tl_requests);
@@ -1203,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
static bool prepare_al_transaction_nonblock(struct drbd_device *device,
struct list_head *incoming,
- struct list_head *pending)
+ struct list_head *pending,
+ struct list_head *later)
{
struct drbd_request *req, *tmp;
int wake = 0;
@@ -1212,45 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
spin_lock_irq(&device->al_lock);
list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
err = drbd_al_begin_io_nonblock(device, &req->i);
+ if (err == -ENOBUFS)
+ break;
if (err == -EBUSY)
wake = 1;
if (err)
- continue;
- req->rq_state |= RQ_IN_ACT_LOG;
- list_move_tail(&req->tl_requests, pending);
+ list_move_tail(&req->tl_requests, later);
+ else
+ list_move_tail(&req->tl_requests, pending);
}
spin_unlock_irq(&device->al_lock);
if (wake)
wake_up(&device->al_wait);
-
return !list_empty(pending);
}
+void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
+{
+ struct drbd_request *req, *tmp;
+
+ list_for_each_entry_safe(req, tmp, pending, tl_requests) {
+ req->rq_state |= RQ_IN_ACT_LOG;
+ req->in_actlog_jif = jiffies;
+ atomic_dec(&device->ap_actlog_cnt);
+ list_del_init(&req->tl_requests);
+ drbd_send_and_submit(device, req);
+ }
+}
+
void do_submit(struct work_struct *ws)
{
struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
- LIST_HEAD(incoming);
- LIST_HEAD(pending);
- struct drbd_request *req, *tmp;
+ LIST_HEAD(incoming); /* from drbd_make_request() */
+ LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
+ LIST_HEAD(busy); /* blocked by resync requests */
+
+ /* grab new incoming requests */
+ spin_lock_irq(&device->resource->req_lock);
+ list_splice_tail_init(&device->submit.writes, &incoming);
+ spin_unlock_irq(&device->resource->req_lock);
for (;;) {
- spin_lock(&device->submit.lock);
- list_splice_tail_init(&device->submit.writes, &incoming);
- spin_unlock(&device->submit.lock);
+ DEFINE_WAIT(wait);
+ /* move used-to-be-busy back to front of incoming */
+ list_splice_init(&busy, &incoming);
submit_fast_path(device, &incoming);
if (list_empty(&incoming))
break;
-skip_fast_path:
- wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
- /* Maybe more was queued, while we prepared the transaction?
- * Try to stuff them into this transaction as well.
- * Be strictly non-blocking here, no wait_event, we already
- * have something to commit.
- * Stop if we don't make any more progres.
- */
for (;;) {
+ prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ list_splice_init(&busy, &incoming);
+ prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
+ if (!list_empty(&pending))
+ break;
+
+ schedule();
+
+ /* If all currently "hot" activity log extents are kept busy by
+ * incoming requests, we still must not totally starve new
+ * requests to "cold" extents.
+ * Something left on &incoming means there had not been
+ * enough update slots available, and the activity log
+ * has been marked as "starving".
+ *
+ * Try again now, without looking for new requests,
+ * effectively blocking all new requests until we made
+ * at least _some_ progress with what we currently have.
+ */
+ if (!list_empty(&incoming))
+ continue;
+
+ /* Nothing moved to pending, but nothing left
+ * on incoming: all moved to busy!
+ * Grab new and iterate. */
+ spin_lock_irq(&device->resource->req_lock);
+ list_splice_tail_init(&device->submit.writes, &incoming);
+ spin_unlock_irq(&device->resource->req_lock);
+ }
+ finish_wait(&device->al_wait, &wait);
+
+ /* If the transaction was full, before all incoming requests
+ * had been processed, skip ahead to commit, and iterate
+ * without splicing in more incoming requests from upper layers.
+ *
+ * Else, if all incoming have been processed,
+ * they have become either "pending" (to be submitted after
+ * next transaction commit) or "busy" (blocked by resync).
+ *
+ * Maybe more was queued, while we prepared the transaction?
+ * Try to stuff those into this transaction as well.
+ * Be strictly non-blocking here,
+ * we already have something to commit.
+ *
+ * Commit if we don't make any more progres.
+ */
+
+ while (list_empty(&incoming)) {
LIST_HEAD(more_pending);
LIST_HEAD(more_incoming);
bool made_progress;
@@ -1260,55 +1485,32 @@ skip_fast_path:
if (list_empty(&device->submit.writes))
break;
- spin_lock(&device->submit.lock);
+ spin_lock_irq(&device->resource->req_lock);
list_splice_tail_init(&device->submit.writes, &more_incoming);
- spin_unlock(&device->submit.lock);
+ spin_unlock_irq(&device->resource->req_lock);
if (list_empty(&more_incoming))
break;
- made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending);
+ made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
list_splice_tail_init(&more_pending, &pending);
list_splice_tail_init(&more_incoming, &incoming);
-
if (!made_progress)
break;
}
- drbd_al_begin_io_commit(device, false);
-
- list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
- list_del_init(&req->tl_requests);
- drbd_send_and_submit(device, req);
- }
- /* If all currently hot activity log extents are kept busy by
- * incoming requests, we still must not totally starve new
- * requests to cold extents. In that case, prepare one request
- * in blocking mode. */
- list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
- list_del_init(&req->tl_requests);
- req->rq_state |= RQ_IN_ACT_LOG;
- if (!drbd_al_begin_io_prepare(device, &req->i)) {
- /* Corresponding extent was hot after all? */
- drbd_send_and_submit(device, req);
- } else {
- /* Found a request to a cold extent.
- * Put on "pending" list,
- * and try to cumulate with more. */
- list_add(&req->tl_requests, &pending);
- goto skip_fast_path;
- }
- }
+ drbd_al_begin_io_commit(device);
+ send_and_submit_pending(device, &pending);
}
}
void drbd_make_request(struct request_queue *q, struct bio *bio)
{
struct drbd_device *device = (struct drbd_device *) q->queuedata;
- unsigned long start_time;
+ unsigned long start_jif;
- start_time = jiffies;
+ start_jif = jiffies;
/*
* what we "blindly" assume:
@@ -1316,7 +1518,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
inc_ap_bio(device);
- __drbd_make_request(device, bio, start_time);
+ __drbd_make_request(device, bio, start_jif);
}
/* This is called by bio_add_page().
@@ -1353,36 +1555,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
return limit;
}
-static void find_oldest_requests(
- struct drbd_connection *connection,
- struct drbd_device *device,
- struct drbd_request **oldest_req_waiting_for_peer,
- struct drbd_request **oldest_req_waiting_for_disk)
-{
- struct drbd_request *r;
- *oldest_req_waiting_for_peer = NULL;
- *oldest_req_waiting_for_disk = NULL;
- list_for_each_entry(r, &connection->transfer_log, tl_requests) {
- const unsigned s = r->rq_state;
- if (!*oldest_req_waiting_for_peer
- && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
- *oldest_req_waiting_for_peer = r;
-
- if (!*oldest_req_waiting_for_disk
- && (s & RQ_LOCAL_PENDING) && r->device == device)
- *oldest_req_waiting_for_disk = r;
-
- if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
- break;
- }
-}
-
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
struct drbd_connection *connection = first_peer_device(device)->connection;
- struct drbd_request *req_disk, *req_peer; /* oldest request */
+ struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
struct net_conf *nc;
+ unsigned long oldest_submit_jif;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;
@@ -1403,14 +1582,31 @@ void request_timer_fn(unsigned long data)
return; /* Recurring timer stopped */
now = jiffies;
+ nt = now + et;
spin_lock_irq(&device->resource->req_lock);
- find_oldest_requests(connection, device, &req_peer, &req_disk);
- if (req_peer == NULL && req_disk == NULL) {
- spin_unlock_irq(&device->resource->req_lock);
- mod_timer(&device->request_timer, now + et);
- return;
- }
+ req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
+ req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
+ req_peer = connection->req_not_net_done;
+ /* maybe the oldest request waiting for the peer is in fact still
+ * blocking in tcp sendmsg */
+ if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
+ req_peer = connection->req_next;
+
+ /* evaluate the oldest peer request only in one timer! */
+ if (req_peer && req_peer->device != device)
+ req_peer = NULL;
+
+ /* do we have something to evaluate? */
+ if (req_peer == NULL && req_write == NULL && req_read == NULL)
+ goto out;
+
+ oldest_submit_jif =
+ (req_write && req_read)
+ ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
+ ? req_write->pre_submit_jif : req_read->pre_submit_jif )
+ : req_write ? req_write->pre_submit_jif
+ : req_read ? req_read->pre_submit_jif : now;
/* The request is considered timed out, if
* - we have some effective timeout from the configuration,
@@ -1429,13 +1625,13 @@ void request_timer_fn(unsigned long data)
* to expire twice (worst case) to become effective. Good enough.
*/
if (ent && req_peer &&
- time_after(now, req_peer->start_time + ent) &&
+ time_after(now, req_peer->pre_send_jif + ent) &&
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}
- if (dt && req_disk &&
- time_after(now, req_disk->start_time + dt) &&
+ if (dt && oldest_submit_jif != now &&
+ time_after(now, oldest_submit_jif + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
@@ -1443,11 +1639,12 @@ void request_timer_fn(unsigned long data)
/* Reschedule timer for the nearest not already expired timeout.
* Fallback to now + min(effective network timeout, disk timeout). */
- ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
- ? req_peer->start_time + ent : now + et;
- dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
- ? req_disk->start_time + dt : now + et;
+ ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
+ ? req_peer->pre_send_jif + ent : now + et;
+ dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
+ ? oldest_submit_jif + dt : now + et;
nt = time_before(ent, dt) ? ent : dt;
+out:
spin_unlock_irq(&connection->resource->req_lock);
mod_timer(&device->request_timer, nt);
}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 8566cd5866b..9f6a04080e9 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device,
extern void request_timer_fn(unsigned long data);
extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
+extern void tl_abort_disk_io(struct drbd_device *device);
/* this is in drbd_main.c */
extern void drbd_restart_request(struct drbd_request *req);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index a5d8aae00e0..c35c0f001bb 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
return rv;
}
-static void print_st(struct drbd_device *device, char *name, union drbd_state ns)
+static void print_st(struct drbd_device *device, const char *name, union drbd_state ns)
{
drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
name,
@@ -952,11 +952,12 @@ enum drbd_state_rv
__drbd_set_state(struct drbd_device *device, union drbd_state ns,
enum chg_state_flags flags, struct completion *done)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
union drbd_state os;
enum drbd_state_rv rv = SS_SUCCESS;
enum sanitize_state_warnings ssw;
struct after_state_chg_work *ascw;
- bool did_remote, should_do_remote;
os = drbd_read_state(device);
@@ -978,9 +979,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
this happen...*/
if (is_valid_state(device, os) == rv)
- rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
+ rv = is_valid_soft_transition(os, ns, connection);
} else
- rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
+ rv = is_valid_soft_transition(os, ns, connection);
}
if (rv < SS_SUCCESS) {
@@ -997,7 +998,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
sanitize_state(). Only display it here if we where not called from
_conn_request_state() */
if (!(flags & CS_DC_SUSP))
- conn_pr_state_change(first_peer_device(device)->connection, os, ns,
+ conn_pr_state_change(connection, os, ns,
(flags & ~CS_DC_MASK) | CS_DC_SUSP);
/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
@@ -1008,28 +1009,35 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
atomic_inc(&device->local_cnt);
- did_remote = drbd_should_do_remote(device->state);
+ if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
+ clear_bit(RS_DONE, &device->flags);
+
+ /* changes to local_cnt and device flags should be visible before
+ * changes to state, which again should be visible before anything else
+ * depending on that change happens. */
+ smp_wmb();
device->state.i = ns.i;
- should_do_remote = drbd_should_do_remote(device->state);
device->resource->susp = ns.susp;
device->resource->susp_nod = ns.susp_nod;
device->resource->susp_fen = ns.susp_fen;
+ smp_wmb();
/* put replicated vs not-replicated requests in seperate epochs */
- if (did_remote != should_do_remote)
- start_new_tl_epoch(first_peer_device(device)->connection);
+ if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
+ drbd_should_do_remote((union drbd_dev_state)ns.i))
+ start_new_tl_epoch(connection);
if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
drbd_print_uuids(device, "attached to UUIDs");
/* Wake up role changes, that were delayed because of connection establishing */
if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
- no_peer_wf_report_params(first_peer_device(device)->connection))
- clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags);
+ no_peer_wf_report_params(connection))
+ clear_bit(STATE_SENT, &connection->flags);
wake_up(&device->misc_wait);
wake_up(&device->state_wait);
- wake_up(&first_peer_device(device)->connection->ping_wait);
+ wake_up(&connection->ping_wait);
/* Aborted verify run, or we reached the stop sector.
* Log the last position, unless end-of-device. */
@@ -1118,21 +1126,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
/* Receiver should clean up itself */
if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
- drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver);
+ drbd_thread_stop_nowait(&connection->receiver);
/* Now the receiver finished cleaning up itself, it should die */
if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
- drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver);
+ drbd_thread_stop_nowait(&connection->receiver);
/* Upon network failure, we need to restart the receiver. */
if (os.conn > C_WF_CONNECTION &&
ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
- drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver);
+ drbd_thread_restart_nowait(&connection->receiver);
/* Resume AL writing if we get a connection */
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
drbd_resume_al(device);
- first_peer_device(device)->connection->connect_cnt++;
+ connection->connect_cnt++;
}
/* remember last attach time so request_timer_fn() won't
@@ -1150,7 +1158,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
ascw->w.cb = w_after_state_ch;
ascw->device = device;
ascw->done = done;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&ascw->w);
} else {
drbd_err(device, "Could not kmalloc an ascw\n");
@@ -1222,13 +1230,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
union drbd_state ns, enum chg_state_flags flags)
{
struct drbd_resource *resource = device->resource;
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
struct sib_info sib;
sib.sib_reason = SIB_STATE_CHANGE;
sib.os = os;
sib.ns = ns;
- if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+ if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
+ && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
clear_bit(CRASHED_PRIMARY, &device->flags);
if (device->p_uuid)
device->p_uuid[UI_FLAGS] &= ~((u64)2);
@@ -1245,7 +1256,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
state change. This function might sleep */
if (ns.susp_nod) {
- struct drbd_connection *connection = first_peer_device(device)->connection;
enum drbd_req_event what = NOTHING;
spin_lock_irq(&device->resource->req_lock);
@@ -1267,8 +1277,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
}
if (ns.susp_fen) {
- struct drbd_connection *connection = first_peer_device(device)->connection;
-
spin_lock_irq(&device->resource->req_lock);
if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
/* case2: The connection was established again: */
@@ -1294,8 +1302,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
* which is unexpected. */
if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
(ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
- first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) {
- drbd_gen_and_send_sync_uuid(first_peer_device(device));
+ connection->agreed_pro_version >= 96 && get_ldev(device)) {
+ drbd_gen_and_send_sync_uuid(peer_device);
put_ldev(device);
}
@@ -1309,8 +1317,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
atomic_set(&device->rs_pending_cnt, 0);
drbd_rs_cancel_all(device);
- drbd_send_uuids(first_peer_device(device));
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_uuids(peer_device);
+ drbd_send_state(peer_device, ns);
}
/* No point in queuing send_bitmap if we don't have a connection
* anymore, so check also the _current_ state, not only the new state
@@ -1335,7 +1343,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
set_bit(NEW_CUR_UUID, &device->flags);
} else {
drbd_uuid_new_current(device);
- drbd_send_uuids(first_peer_device(device));
+ drbd_send_uuids(peer_device);
}
}
put_ldev(device);
@@ -1346,7 +1354,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
drbd_uuid_new_current(device);
- drbd_send_uuids(first_peer_device(device));
+ drbd_send_uuids(peer_device);
}
/* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1373,16 +1381,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* Last part of the attaching process ... */
if (ns.conn >= C_CONNECTED &&
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
- drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */
- drbd_send_uuids(first_peer_device(device));
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_sizes(peer_device, 0, 0); /* to start sync... */
+ drbd_send_uuids(peer_device);
+ drbd_send_state(peer_device, ns);
}
/* We want to pause/continue resync, tell peer. */
if (ns.conn >= C_CONNECTED &&
((os.aftr_isp != ns.aftr_isp) ||
(os.user_isp != ns.user_isp)))
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* In case one of the isp bits got set, suspend other devices. */
if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
@@ -1392,10 +1400,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* Make sure the peer gets informed about eventual state
changes (ISP bits) while we were in WFReportParams. */
if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* We are in the progress to start a full sync... */
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
@@ -1449,7 +1457,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
drbd_disk_str(device->state.disk));
if (ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
drbd_rs_cancel_all(device);
@@ -1473,7 +1481,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
drbd_disk_str(device->state.disk));
if (ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* corresponding get_ldev in __drbd_set_state
* this may finally trigger drbd_ldev_destroy. */
put_ldev(device);
@@ -1481,7 +1489,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* Notify peer that I had a local IO error, and did not detached.. */
if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* Disks got bigger while they were detached */
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
@@ -1499,14 +1507,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* sync target done with resync. Explicitly notify peer, even though
* it should (at least for non-empty resyncs) already know itself. */
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* Verify finished, or reached stop sector. Peer did not know about
* the stop sector, and we may even have changed the stop sector during
* verify to interrupt/stop early. Send the new state. */
if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
&& verify_can_do_stop_sector(device))
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* This triggers bitmap writeout of potentially still unwritten pages
* if the resync finished cleanly, or aborted because of peer disk
@@ -1563,7 +1571,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
old_conf = connection->net_conf;
connection->my_addr_len = 0;
connection->peer_addr_len = 0;
- rcu_assign_pointer(connection->net_conf, NULL);
+ RCU_INIT_POINTER(connection->net_conf, NULL);
conn_free_crypto(connection);
mutex_unlock(&connection->resource->conf_update);
@@ -1599,7 +1607,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
return 0;
}
-void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
+static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
{
enum chg_state_flags flags = ~0;
struct drbd_peer_device *peer_device;
@@ -1688,7 +1696,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
return rv;
}
-void
+static void
conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
{
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d8f57b6305c..50776b36282 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -67,13 +67,10 @@ rwlock_t global_state_lock;
*/
void drbd_md_io_complete(struct bio *bio, int error)
{
- struct drbd_md_io *md_io;
struct drbd_device *device;
- md_io = (struct drbd_md_io *)bio->bi_private;
- device = container_of(md_io, struct drbd_device, md_io);
-
- md_io->error = error;
+ device = bio->bi_private;
+ device->md_io.error = error;
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
@@ -87,7 +84,7 @@ void drbd_md_io_complete(struct bio *bio, int error)
* ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
*/
drbd_md_put_buffer(device);
- md_io->done = 1;
+ device->md_io.done = 1;
wake_up(&device->misc_wait);
bio_put(bio);
if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
@@ -135,6 +132,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
i = peer_req->i;
do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
block_id = peer_req->block_id;
+ peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
spin_lock_irqsave(&device->resource->req_lock, flags);
device->writ_cnt += peer_req->i.size >> 9;
@@ -398,9 +396,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
if (!get_ldev(device))
return -EIO;
- if (drbd_rs_should_slow_down(device, sector))
- goto defer;
-
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
@@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
peer_req->w.cb = w_e_send_csum;
spin_lock_irq(&device->resource->req_lock);
- list_add(&peer_req->w.list, &device->read_ee);
+ list_add_tail(&peer_req->w.list, &device->read_ee);
spin_unlock_irq(&device->resource->req_lock);
atomic_add(size >> 9, &device->rs_sect_ev);
@@ -452,9 +447,9 @@ void resync_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
- if (list_empty(&device->resync_work.list))
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
- &device->resync_work);
+ drbd_queue_work_if_unqueued(
+ &first_peer_device(device)->connection->sender_work,
+ &device->resync_work);
}
static void fifo_set(struct fifo_buffer *fb, int value)
@@ -504,9 +499,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
{
struct disk_conf *dc;
- unsigned int want; /* The number of sectors we want in the proxy */
+ unsigned int want; /* The number of sectors we want in-flight */
int req_sect; /* Number of sectors to request in this turn */
- int correction; /* Number of sectors more we need in the proxy*/
+ int correction; /* Number of sectors more we need in-flight */
int cps; /* correction per invocation of drbd_rs_controller() */
int steps; /* Number of time steps to plan ahead */
int curr_corr;
@@ -577,20 +572,27 @@ static int drbd_rs_number_requests(struct drbd_device *device)
* potentially causing a distributed deadlock on congestion during
* online-verify or (checksum-based) resync, if max-buffers,
* socket buffer sizes and resync rate settings are mis-configured. */
- if (mxb - device->rs_in_flight < number)
- number = mxb - device->rs_in_flight;
+
+ /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
+ * mxb (as used here, and in drbd_alloc_pages on the peer) is
+ * "number of pages" (typically also 4k),
+ * but "rs_in_flight" is in "sectors" (512 Byte). */
+ if (mxb - device->rs_in_flight/8 < number)
+ number = mxb - device->rs_in_flight/8;
return number;
}
-static int make_resync_request(struct drbd_device *device, int cancel)
+static int make_resync_request(struct drbd_device *const device, int cancel)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
unsigned long bit;
sector_t sector;
const sector_t capacity = drbd_get_capacity(device->this_bdev);
int max_bio_size;
int number, rollback_i, size;
- int align, queued, sndbuf;
+ int align, requeue = 0;
int i = 0;
if (unlikely(cancel))
@@ -617,17 +619,22 @@ static int make_resync_request(struct drbd_device *device, int cancel)
goto requeue;
for (i = 0; i < number; i++) {
- /* Stop generating RS requests, when half of the send buffer is filled */
- mutex_lock(&first_peer_device(device)->connection->data.mutex);
- if (first_peer_device(device)->connection->data.socket) {
- queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
- sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
- } else {
- queued = 1;
- sndbuf = 0;
- }
- mutex_unlock(&first_peer_device(device)->connection->data.mutex);
- if (queued > sndbuf / 2)
+ /* Stop generating RS requests when half of the send buffer is filled,
+ * but notify TCP that we'd like to have more space. */
+ mutex_lock(&connection->data.mutex);
+ if (connection->data.socket) {
+ struct sock *sk = connection->data.socket->sk;
+ int queued = sk->sk_wmem_queued;
+ int sndbuf = sk->sk_sndbuf;
+ if (queued > sndbuf / 2) {
+ requeue = 1;
+ if (sk->sk_socket)
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ } else
+ requeue = 1;
+ mutex_unlock(&connection->data.mutex);
+ if (requeue)
goto requeue;
next_sector:
@@ -642,8 +649,7 @@ next_sector:
sector = BM_BIT_TO_SECT(bit);
- if (drbd_rs_should_slow_down(device, sector) ||
- drbd_try_rs_begin_io(device, sector)) {
+ if (drbd_try_rs_begin_io(device, sector)) {
device->bm_resync_fo = bit;
goto requeue;
}
@@ -696,9 +702,9 @@ next_sector:
/* adjust very last sectors, in case we are oddly sized */
if (sector + (size>>9) > capacity)
size = (capacity-sector)<<9;
- if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
- first_peer_device(device)->connection->csums_tfm) {
- switch (read_for_csum(first_peer_device(device), sector, size)) {
+
+ if (device->use_csums) {
+ switch (read_for_csum(peer_device, sector, size)) {
case -EIO: /* Disk failure */
put_ldev(device);
return -EIO;
@@ -717,7 +723,7 @@ next_sector:
int err;
inc_rs_pending(device);
- err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
+ err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
sector, size, ID_SYNCER);
if (err) {
drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
@@ -774,8 +780,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
size = BM_BLOCK_SIZE;
- if (drbd_rs_should_slow_down(device, sector) ||
- drbd_try_rs_begin_io(device, sector)) {
+ if (drbd_try_rs_begin_io(device, sector)) {
device->ov_position = sector;
goto requeue;
}
@@ -911,7 +916,7 @@ int drbd_resync_finished(struct drbd_device *device)
if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
khelper_cmd = "after-resync-target";
- if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
+ if (device->use_csums && device->rs_total) {
const unsigned long s = device->rs_same_csum;
const unsigned long t = device->rs_total;
const int ratio =
@@ -1351,13 +1356,15 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
struct drbd_device *device = req->device;
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device->connection;
int err;
if (unlikely(cancel)) {
req_mod(req, SEND_CANCELED);
return 0;
}
+ req->pre_send_jif = jiffies;
/* this time, no connection->send.current_epoch_writes++;
* If it was sent, it was the closing barrier for the last
@@ -1365,7 +1372,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
* No more barriers will be sent, until we leave AHEAD mode again. */
maybe_send_barrier(connection, req->epoch);
- err = drbd_send_out_of_sync(first_peer_device(device), req);
+ err = drbd_send_out_of_sync(peer_device, req);
req_mod(req, OOS_HANDED_TO_NETWORK);
return err;
@@ -1380,19 +1387,21 @@ int w_send_dblock(struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
struct drbd_device *device = req->device;
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device->connection;
int err;
if (unlikely(cancel)) {
req_mod(req, SEND_CANCELED);
return 0;
}
+ req->pre_send_jif = jiffies;
re_init_if_first_write(connection, req->epoch);
maybe_send_barrier(connection, req->epoch);
connection->send.current_epoch_writes++;
- err = drbd_send_dblock(first_peer_device(device), req);
+ err = drbd_send_dblock(peer_device, req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
return err;
@@ -1407,19 +1416,21 @@ int w_send_read_req(struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
struct drbd_device *device = req->device;
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device->connection;
int err;
if (unlikely(cancel)) {
req_mod(req, SEND_CANCELED);
return 0;
}
+ req->pre_send_jif = jiffies;
/* Even read requests may close a write epoch,
* if there was any yet. */
maybe_send_barrier(connection, req->epoch);
- err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
+ err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
(unsigned long)req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
@@ -1433,7 +1444,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
- drbd_al_begin_io(device, &req->i, false);
+ drbd_al_begin_io(device, &req->i);
drbd_req_make_private_bio(req, req->master_bio);
req->private_bio->bi_bdev = device->ldev->backing_bdev;
@@ -1601,26 +1612,32 @@ void drbd_rs_controller_reset(struct drbd_device *device)
void start_resync_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
-
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
- &device->start_resync_work);
+ drbd_device_post_work(device, RS_START);
}
-int w_start_resync(struct drbd_work *w, int cancel)
+static void do_start_resync(struct drbd_device *device)
{
- struct drbd_device *device =
- container_of(w, struct drbd_device, start_resync_work);
-
if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
- drbd_warn(device, "w_start_resync later...\n");
+ drbd_warn(device, "postponing start_resync ...\n");
device->start_resync_timer.expires = jiffies + HZ/10;
add_timer(&device->start_resync_timer);
- return 0;
+ return;
}
drbd_start_resync(device, C_SYNC_SOURCE);
clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
- return 0;
+}
+
+static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
+{
+ bool csums_after_crash_only;
+ rcu_read_lock();
+ csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
+ rcu_read_unlock();
+ return connection->agreed_pro_version >= 89 && /* supported? */
+ connection->csums_tfm && /* configured? */
+ (csums_after_crash_only == 0 /* use for each resync? */
+ || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
}
/**
@@ -1633,6 +1650,8 @@ int w_start_resync(struct drbd_work *w, int cancel)
*/
void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
union drbd_state ns;
int r;
@@ -1651,7 +1670,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
if (r > 0) {
drbd_info(device, "before-resync-target handler returned %d, "
"dropping connection.\n", r);
- conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
+ conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
return;
}
} else /* C_SYNC_SOURCE */ {
@@ -1664,7 +1683,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
} else {
drbd_info(device, "before-resync-source handler returned %d, "
"dropping connection.\n", r);
- conn_request_state(first_peer_device(device)->connection,
+ conn_request_state(connection,
NS(conn, C_DISCONNECTING), CS_HARD);
return;
}
@@ -1672,7 +1691,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
}
}
- if (current == first_peer_device(device)->connection->worker.task) {
+ if (current == connection->worker.task) {
/* The worker should not sleep waiting for state_mutex,
that can take long */
if (!mutex_trylock(device->state_mutex)) {
@@ -1733,11 +1752,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->rs_mark_time[i] = now;
}
_drbd_pause_after(device);
+ /* Forget potentially stale cached per resync extent bit-counts.
+ * Open coded drbd_rs_cancel_all(device), we already have IRQs
+ * disabled, and know the disk state is ok. */
+ spin_lock(&device->al_lock);
+ lc_reset(device->resync);
+ device->resync_locked = 0;
+ device->resync_wenr = LC_FREE;
+ spin_unlock(&device->al_lock);
}
write_unlock(&global_state_lock);
spin_unlock_irq(&device->resource->req_lock);
if (r == SS_SUCCESS) {
+ wake_up(&device->al_wait); /* for lc_reset() above */
/* reset rs_last_bcast when a resync or verify is started,
* to deal with potential jiffies wrap. */
device->rs_last_bcast = jiffies - HZ;
@@ -1746,8 +1774,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
drbd_conn_str(ns.conn),
(unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
(unsigned long) device->rs_total);
- if (side == C_SYNC_TARGET)
+ if (side == C_SYNC_TARGET) {
device->bm_resync_fo = 0;
+ device->use_csums = use_checksum_based_resync(connection, device);
+ } else {
+ device->use_csums = 0;
+ }
/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
* with w_send_oos, or the sync target will get confused as to
@@ -1756,12 +1788,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
* drbd_resync_finished from here in that case.
* We drbd_gen_and_send_sync_uuid here for protocol < 96,
* and from after_state_ch otherwise. */
- if (side == C_SYNC_SOURCE &&
- first_peer_device(device)->connection->agreed_pro_version < 96)
- drbd_gen_and_send_sync_uuid(first_peer_device(device));
+ if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
+ drbd_gen_and_send_sync_uuid(peer_device);
- if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
- device->rs_total == 0) {
+ if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
/* This still has a race (about when exactly the peers
* detect connection loss) that can lead to a full sync
* on next handshake. In 8.3.9 we fixed this with explicit
@@ -1777,7 +1807,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
int timeo;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
rcu_read_unlock();
schedule_timeout_interruptible(timeo);
@@ -1799,10 +1829,165 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
mutex_unlock(device->state_mutex);
}
+static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
+{
+ struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
+ device->rs_last_bcast = jiffies;
+
+ if (!get_ldev(device))
+ return;
+
+ drbd_bm_write_lazy(device, 0);
+ if (resync_done && is_sync_state(device->state.conn))
+ drbd_resync_finished(device);
+
+ drbd_bcast_event(device, &sib);
+ /* update timestamp, in case it took a while to write out stuff */
+ device->rs_last_bcast = jiffies;
+ put_ldev(device);
+}
+
+static void drbd_ldev_destroy(struct drbd_device *device)
+{
+ lc_destroy(device->resync);
+ device->resync = NULL;
+ lc_destroy(device->act_log);
+ device->act_log = NULL;
+ __no_warn(local,
+ drbd_free_ldev(device->ldev);
+ device->ldev = NULL;);
+ clear_bit(GOING_DISKLESS, &device->flags);
+ wake_up(&device->misc_wait);
+}
+
+static void go_diskless(struct drbd_device *device)
+{
+ D_ASSERT(device, device->state.disk == D_FAILED);
+ /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
+ * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
+ * the protected members anymore, though, so once put_ldev reaches zero
+ * again, it will be safe to free them. */
+
+ /* Try to write changed bitmap pages, read errors may have just
+ * set some bits outside the area covered by the activity log.
+ *
+ * If we have an IO error during the bitmap writeout,
+ * we will want a full sync next time, just in case.
+ * (Do we want a specific meta data flag for this?)
+ *
+ * If that does not make it to stable storage either,
+ * we cannot do anything about that anymore.
+ *
+ * We still need to check if both bitmap and ldev are present, we may
+ * end up here after a failed attach, before ldev was even assigned.
+ */
+ if (device->bitmap && device->ldev) {
+ /* An interrupted resync or similar is allowed to recounts bits
+ * while we detach.
+ * Any modifications would not be expected anymore, though.
+ */
+ if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
+ "detach", BM_LOCKED_TEST_ALLOWED)) {
+ if (test_bit(WAS_READ_ERROR, &device->flags)) {
+ drbd_md_set_flag(device, MDF_FULL_SYNC);
+ drbd_md_sync(device);
+ }
+ }
+ }
+
+ drbd_force_state(device, NS(disk, D_DISKLESS));
+}
+
+static int do_md_sync(struct drbd_device *device)
+{
+ drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+ drbd_md_sync(device);
+ return 0;
+}
+
+/* only called from drbd_worker thread, no locking */
+void __update_timing_details(
+ struct drbd_thread_timing_details *tdp,
+ unsigned int *cb_nr,
+ void *cb,
+ const char *fn, const unsigned int line)
+{
+ unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
+ struct drbd_thread_timing_details *td = tdp + i;
+
+ td->start_jif = jiffies;
+ td->cb_addr = cb;
+ td->caller_fn = fn;
+ td->line = line;
+ td->cb_nr = *cb_nr;
+
+ i = (i+1) % DRBD_THREAD_DETAILS_HIST;
+ td = tdp + i;
+ memset(td, 0, sizeof(*td));
+
+ ++(*cb_nr);
+}
+
+#define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit))
+static void do_device_work(struct drbd_device *device, const unsigned long todo)
+{
+ if (WORK_PENDING(MD_SYNC, todo))
+ do_md_sync(device);
+ if (WORK_PENDING(RS_DONE, todo) ||
+ WORK_PENDING(RS_PROGRESS, todo))
+ update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
+ if (WORK_PENDING(GO_DISKLESS, todo))
+ go_diskless(device);
+ if (WORK_PENDING(DESTROY_DISK, todo))
+ drbd_ldev_destroy(device);
+ if (WORK_PENDING(RS_START, todo))
+ do_start_resync(device);
+}
+
+#define DRBD_DEVICE_WORK_MASK \
+ ((1UL << GO_DISKLESS) \
+ |(1UL << DESTROY_DISK) \
+ |(1UL << MD_SYNC) \
+ |(1UL << RS_START) \
+ |(1UL << RS_PROGRESS) \
+ |(1UL << RS_DONE) \
+ )
+
+static unsigned long get_work_bits(unsigned long *flags)
+{
+ unsigned long old, new;
+ do {
+ old = *flags;
+ new = old & ~DRBD_DEVICE_WORK_MASK;
+ } while (cmpxchg(flags, old, new) != old);
+ return old & DRBD_DEVICE_WORK_MASK;
+}
+
+static void do_unqueued_work(struct drbd_connection *connection)
+{
+ struct drbd_peer_device *peer_device;
+ int vnr;
+
+ rcu_read_lock();
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+ struct drbd_device *device = peer_device->device;
+ unsigned long todo = get_work_bits(&device->flags);
+ if (!todo)
+ continue;
+
+ kref_get(&device->kref);
+ rcu_read_unlock();
+ do_device_work(device, todo);
+ kref_put(&device->kref, drbd_destroy_device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
{
spin_lock_irq(&queue->q_lock);
- list_splice_init(&queue->q, work_list);
+ list_splice_tail_init(&queue->q, work_list);
spin_unlock_irq(&queue->q_lock);
return !list_empty(work_list);
}
@@ -1851,7 +2036,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
/* dequeue single item only,
* we still use drbd_queue_work_front() in some places */
if (!list_empty(&connection->sender_work.q))
- list_move(connection->sender_work.q.next, work_list);
+ list_splice_tail_init(&connection->sender_work.q, work_list);
spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
if (!list_empty(work_list) || signal_pending(current)) {
spin_unlock_irq(&connection->resource->req_lock);
@@ -1873,6 +2058,14 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
if (send_barrier)
maybe_send_barrier(connection,
connection->send.current_epoch_nr + 1);
+
+ if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
+ break;
+
+ /* drbd_send() may have called flush_signals() */
+ if (get_t_state(&connection->worker) != RUNNING)
+ break;
+
schedule();
/* may be woken up for other things but new work, too,
* e.g. if the current epoch got closed.
@@ -1906,10 +2099,15 @@ int drbd_worker(struct drbd_thread *thi)
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
- /* as long as we use drbd_queue_work_front(),
- * we may only dequeue single work items here, not batches. */
- if (list_empty(&work_list))
+ if (list_empty(&work_list)) {
+ update_worker_timing_details(connection, wait_for_work);
wait_for_work(connection, &work_list);
+ }
+
+ if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
+ update_worker_timing_details(connection, do_unqueued_work);
+ do_unqueued_work(connection);
+ }
if (signal_pending(current)) {
flush_signals(current);
@@ -1926,6 +2124,7 @@ int drbd_worker(struct drbd_thread *thi)
while (!list_empty(&work_list)) {
w = list_first_entry(&work_list, struct drbd_work, list);
list_del_init(&w->list);
+ update_worker_timing_details(connection, w->cb);
if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
continue;
if (connection->cstate >= C_WF_REPORT_PARAMS)
@@ -1934,13 +2133,18 @@ int drbd_worker(struct drbd_thread *thi)
}
do {
+ if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
+ update_worker_timing_details(connection, do_unqueued_work);
+ do_unqueued_work(connection);
+ }
while (!list_empty(&work_list)) {
w = list_first_entry(&work_list, struct drbd_work, list);
list_del_init(&w->list);
+ update_worker_timing_details(connection, w->cb);
w->cb(w, 1);
}
dequeue_work_batch(&connection->sender_work, &work_list);
- } while (!list_empty(&work_list));
+ } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f63d358f3d9..0a581400de0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -15,17 +15,22 @@
#include <linux/numa.h>
#define PART_BITS 4
+#define VQ_NAME_LEN 16
static int major;
static DEFINE_IDA(vd_index_ida);
static struct workqueue_struct *virtblk_wq;
+struct virtio_blk_vq {
+ struct virtqueue *vq;
+ spinlock_t lock;
+ char name[VQ_NAME_LEN];
+} ____cacheline_aligned_in_smp;
+
struct virtio_blk
{
struct virtio_device *vdev;
- struct virtqueue *vq;
- spinlock_t vq_lock;
/* The disk structure for the kernel. */
struct gendisk *disk;
@@ -47,6 +52,10 @@ struct virtio_blk
/* Ida index - used to track minor number allocations. */
int index;
+
+ /* num of vqs */
+ int num_vqs;
+ struct virtio_blk_vq *vqs;
};
struct virtblk_req
@@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
bool req_done = false;
+ int qid = vq->index;
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
- spin_lock_irqsave(&vblk->vq_lock, flags);
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
do {
virtqueue_disable_cb(vq);
- while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+ while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
blk_mq_complete_request(vbr->req);
req_done = true;
}
@@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq)
/* In case queue is stopped waiting for more buffers. */
if (req_done)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
@@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
unsigned long flags;
unsigned int num;
+ int qid = hctx->queue_num;
const bool last = (req->cmd_flags & REQ_END) != 0;
int err;
bool notify = false;
@@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
}
- spin_lock_irqsave(&vblk->vq_lock, flags);
- err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
+ err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
if (err) {
- virtqueue_kick(vblk->vq);
+ virtqueue_kick(vblk->vqs[qid].vq);
blk_mq_stop_hw_queue(hctx);
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
/* Out of mem doesn't actually happen, since we fall back
* to direct descriptors */
if (err == -ENOMEM || err == -ENOSPC)
@@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
return BLK_MQ_RQ_QUEUE_ERROR;
}
- if (last && virtqueue_kick_prepare(vblk->vq))
+ if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
notify = true;
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
if (notify)
- virtqueue_notify(vblk->vq);
+ virtqueue_notify(vblk->vqs[qid].vq);
return BLK_MQ_RQ_QUEUE_OK;
}
@@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev)
static int init_vq(struct virtio_blk *vblk)
{
int err = 0;
+ int i;
+ vq_callback_t **callbacks;
+ const char **names;
+ struct virtqueue **vqs;
+ unsigned short num_vqs;
+ struct virtio_device *vdev = vblk->vdev;
+
+ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
+ struct virtio_blk_config, num_queues,
+ &num_vqs);
+ if (err)
+ num_vqs = 1;
+
+ vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
+ if (!vblk->vqs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+ if (!names)
+ goto err_names;
+
+ callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+ if (!callbacks)
+ goto err_callbacks;
+
+ vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+ if (!vqs)
+ goto err_vqs;
- /* We expect one virtqueue, for output. */
- vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
- if (IS_ERR(vblk->vq))
- err = PTR_ERR(vblk->vq);
+ for (i = 0; i < num_vqs; i++) {
+ callbacks[i] = virtblk_done;
+ snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
+ names[i] = vblk->vqs[i].name;
+ }
+
+ /* Discover virtqueues and write information to configuration. */
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+ if (err)
+ goto err_find_vqs;
+ for (i = 0; i < num_vqs; i++) {
+ spin_lock_init(&vblk->vqs[i].lock);
+ vblk->vqs[i].vq = vqs[i];
+ }
+ vblk->num_vqs = num_vqs;
+
+ err_find_vqs:
+ kfree(vqs);
+ err_vqs:
+ kfree(callbacks);
+ err_callbacks:
+ kfree(names);
+ err_names:
+ if (err)
+ kfree(vblk->vqs);
+ out:
return err;
}
@@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev)
err = init_vq(vblk);
if (err)
goto out_free_vblk;
- spin_lock_init(&vblk->vq_lock);
/* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << PART_BITS);
@@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev)
/* Default queue sizing is to fill the ring. */
if (!virtblk_queue_depth) {
- virtblk_queue_depth = vblk->vq->num_free;
+ virtblk_queue_depth = vblk->vqs[0].vq->num_free;
/* ... but without indirect descs, we use 2 descs per req */
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
virtblk_queue_depth /= 2;
@@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev)
memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
vblk->tag_set.ops = &virtio_mq_ops;
- vblk->tag_set.nr_hw_queues = 1;
vblk->tag_set.queue_depth = virtblk_queue_depth;
vblk->tag_set.numa_node = NUMA_NO_NODE;
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev)
sizeof(struct virtblk_req) +
sizeof(struct scatterlist) * sg_elems;
vblk->tag_set.driver_data = vblk;
+ vblk->tag_set.nr_hw_queues = vblk->num_vqs;
err = blk_mq_alloc_tag_set(&vblk->tag_set);
if (err)
@@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev)
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
put_disk(vblk->disk);
vdev->config->del_vqs(vdev);
+ kfree(vblk->vqs);
kfree(vblk);
/* Only free device id if we don't have any users */
@@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = {
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
- VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
+ VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
+ VIRTIO_BLK_F_MQ,
};
static struct virtio_driver virtio_blk = {
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 8bc422977b5..4ff86878727 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -499,8 +499,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
}
/* Lookup the i2c hwclock */
- for (hwclock = NULL;
- (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
+ for_each_node_by_name(hwclock, "i2c-hwclock") {
const char *loc = of_get_property(hwclock,
"hwctrl-location", NULL);
if (loc == NULL)
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 544f6d327ed..061407d5952 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop)
goto error_out;
}
- /* Set ptr to new property if provided */
- if (new_prop) {
- /* Single property */
- if (!strncmp(new_prop->name, "status", new_prop->length)) {
- status = new_prop;
-
- } else if (!strncmp(new_prop->name, "ibm,max-sg-len",
- new_prop->length)) {
- maxsglen = new_prop;
-
- } else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
- new_prop->length)) {
- maxsyncop = new_prop;
-
- } else {
- /*
- * Skip the update, the property being updated
- * has no impact.
- */
- goto out;
- }
- }
+ /*
+ * If this is a property update, there are only certain properties that
+ * we care about. Bail if it isn't in the below list
+ */
+ if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+ strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+ strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+ goto out;
/* Perform property updates */
ret = nx842_OF_upd_status(new_devdata, status);
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 374b57fc596..a12c8552f6a 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -134,8 +134,7 @@ static void cell_edac_init_csrows(struct mem_ctl_info *mci)
int j;
u32 nr_pages;
- for (np = NULL;
- (np = of_find_node_by_name(np, "memory")) != NULL;) {
+ for_each_node_by_name(np, "memory") {
struct resource r;
/* We "know" that the Cell firmware only creates one entry
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index d3d0e8cf27b..d6c767ace91 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -382,6 +382,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index ca8430f9256..e67b9a50ac7 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -1085,6 +1085,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 22e0c926989..126516414c1 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -212,6 +212,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
dev_err(&client->dev,
"invalid gain on %s\n",
node->full_name);
+ return -EINVAL;
}
}
@@ -222,6 +223,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
dev_err(&client->dev,
"invalid data_rate on %s\n",
node->full_name);
+ return -EINVAL;
}
}
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index f96063680e5..272fcc837ec 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -510,6 +510,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
err = kstrtoul(buf, 10, &val);
if (err)
return err;
+
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 4ae3fff13f4..bea0a344fab 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -247,8 +247,8 @@ struct dme1737_data {
u8 pwm_acz[3];
u8 pwm_freq[6];
u8 pwm_rr[2];
- u8 zone_low[3];
- u8 zone_abs[3];
+ s8 zone_low[3];
+ s8 zone_abs[3];
u8 zone_hyst[2];
u32 alarms;
};
@@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res)
return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2));
}
-static inline int IN_TO_REG(int val, int nominal)
+static inline int IN_TO_REG(long val, int nominal)
{
return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255);
}
@@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res)
return (reg * 1000) >> (res - 8);
}
-static inline int TEMP_TO_REG(int val)
+static inline int TEMP_TO_REG(long val)
{
return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127);
}
@@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg)
return TEMP_RANGE[(reg >> 4) & 0x0f];
}
-static int TEMP_RANGE_TO_REG(int val, int reg)
+static int TEMP_RANGE_TO_REG(long val, int reg)
{
int i;
@@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix)
return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000;
}
-static inline int TEMP_HYST_TO_REG(int val, int ix, int reg)
+static inline int TEMP_HYST_TO_REG(long val, int ix, int reg)
{
int hyst = clamp_val((val + 500) / 1000, 0, 15);
@@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc)
return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg;
}
-static inline int FAN_TO_REG(int val, int tpc)
+static inline int FAN_TO_REG(long val, int tpc)
{
if (tpc) {
return clamp_val(val / tpc, 0, 0xffff);
@@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg)
return (edge > 0) ? 1 << (edge - 1) : 0;
}
-static inline int FAN_TYPE_TO_REG(int val, int reg)
+static inline int FAN_TYPE_TO_REG(long val, int reg)
{
int edge = (val == 4) ? 3 : val;
@@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg)
return 1000 + i * 500;
}
-static int FAN_MAX_TO_REG(int val)
+static int FAN_MAX_TO_REG(long val)
{
int i;
@@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg)
return acz[(reg >> 5) & 0x07];
}
-static inline int PWM_ACZ_TO_REG(int val, int reg)
+static inline int PWM_ACZ_TO_REG(long val, int reg)
{
int acz = (val == 4) ? 2 : val - 1;
@@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg)
return PWM_FREQ[reg & 0x0f];
}
-static int PWM_FREQ_TO_REG(int val, int reg)
+static int PWM_FREQ_TO_REG(long val, int reg)
{
int i;
@@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix)
return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0;
}
-static int PWM_RR_TO_REG(int val, int ix, int reg)
+static int PWM_RR_TO_REG(long val, int ix, int reg)
{
int i;
@@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix)
return PWM_RR_FROM_REG(reg, ix) ? 1 : 0;
}
-static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg)
+static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg)
{
int en = (ix == 1) ? 0x80 : 0x08;
@@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct dme1737_data *data = dev_get_drvdata(dev);
- long val;
+ unsigned long val;
int err;
- err = kstrtol(buf, 10, &val);
+ err = kstrtoul(buf, 10, &val);
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/emc6w201.c b/drivers/hwmon/emc6w201.c
index e87da902f3a..ada90716448 100644
--- a/drivers/hwmon/emc6w201.c
+++ b/drivers/hwmon/emc6w201.c
@@ -252,12 +252,12 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *devattr,
if (err < 0)
return err;
- val /= 1000;
+ val = DIV_ROUND_CLOSEST(val, 1000);
reg = (sf == min) ? EMC6W201_REG_TEMP_LOW(nr)
: EMC6W201_REG_TEMP_HIGH(nr);
mutex_lock(&data->update_lock);
- data->temp[sf][nr] = clamp_val(val, -127, 128);
+ data->temp[sf][nr] = clamp_val(val, -127, 127);
err = emc6w201_write8(client, reg, data->temp[sf][nr]);
mutex_unlock(&data->update_lock);
diff --git a/drivers/hwmon/hih6130.c b/drivers/hwmon/hih6130.c
index 0e01c4e13e3..7b73d2002d3 100644
--- a/drivers/hwmon/hih6130.c
+++ b/drivers/hwmon/hih6130.c
@@ -238,6 +238,9 @@ static int hih6130_probe(struct i2c_client *client,
hih6130->client = client;
mutex_init(&hih6130->lock);
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_QUICK))
+ hih6130->write_length = 1;
+
hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
hih6130,
hih6130_groups);
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index ba1d83d4805..a5e295826ae 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -617,6 +617,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
err = kstrtoul(buf, 10, &val);
if (err)
return err;
+
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index d2060e245ff..cfaf70b9cba 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -74,12 +74,9 @@ static inline int TEMP_FROM_REG(s16 reg)
return reg / 8 * 625 / 10;
}
-static inline s16 TEMP_TO_REG(int val)
+static inline s16 TEMP_TO_REG(long val)
{
- if (val <= -60000)
- return -60000 * 10 / 625 * 8;
- if (val >= 160000)
- return 160000 * 10 / 625 * 8;
+ val = clamp_val(val, -60000, 160000);
return val * 10 / 625 * 8;
}
@@ -206,10 +203,12 @@ static ssize_t set_temp_hyst(struct device *dev,
if (err)
return err;
+ val = clamp_val(val, -120000, 220000);
mutex_lock(&data->update_lock);
- data->temp[t_hyst] = TEMP_FROM_REG(data->temp[attr->index]) - val;
+ data->temp[t_hyst] =
+ TEMP_TO_REG(TEMP_FROM_REG(data->temp[attr->index]) - val);
i2c_smbus_write_word_swapped(client, LM92_REG_TEMP_HYST,
- TEMP_TO_REG(data->temp[t_hyst]));
+ data->temp[t_hyst]);
mutex_unlock(&data->update_lock);
return count;
}
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 988181e4cfc..145f674c1d8 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -615,6 +615,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c
index c74d2da389d..e42964f07f6 100644
--- a/drivers/hwmon/tmp103.c
+++ b/drivers/hwmon/tmp103.c
@@ -131,13 +131,6 @@ static int tmp103_probe(struct i2c_client *client,
struct regmap *regmap;
int ret;
- if (!i2c_check_functionality(client->adapter,
- I2C_FUNC_SMBUS_BYTE_DATA)) {
- dev_err(&client->dev,
- "adapter doesn't support SMBus byte transactions\n");
- return -ENODEV;
- }
-
regmap = devm_regmap_init_i2c(client, &tmp103_regmap_config);
if (IS_ERR(regmap)) {
dev_err(dev, "failed to allocate register map\n");
diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c
index 344b22ec255..3ea57c3504e 100644
--- a/drivers/hwmon/vt1211.c
+++ b/drivers/hwmon/vt1211.c
@@ -879,6 +879,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index c1726be3654..2f55973a8c4 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -820,6 +820,9 @@ store_vrm_reg(struct device *dev, struct device_attribute *attr, const char *buf
err = kstrtoul(buf, 10, &val);
if (err)
return err;
+
+ if (val > 255)
+ return -EINVAL;
data->vrm = val;
return count;
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index cb3765fec98..001df856913 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -1181,6 +1181,9 @@ static ssize_t store_vrm_reg(struct device *dev,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 9d63d71214c..816aa6caf5d 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -353,6 +353,9 @@ store_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig
index 70637d23b1f..3612cb5b30b 100644
--- a/drivers/hwspinlock/Kconfig
+++ b/drivers/hwspinlock/Kconfig
@@ -10,7 +10,7 @@ menu "Hardware Spinlock drivers"
config HWSPINLOCK_OMAP
tristate "OMAP Hardware Spinlock device"
- depends on ARCH_OMAP4 || SOC_OMAP5
+ depends on ARCH_OMAP4 || SOC_OMAP5 || SOC_DRA7XX || SOC_AM33XX || SOC_AM43XX
select HWSPINLOCK
help
Say y here to support the OMAP Hardware Spinlock device (firstly
diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c
index 292869cc903..c1e2cd4d85f 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -98,10 +98,29 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
if (!io_base)
return -ENOMEM;
+ /*
+ * make sure the module is enabled and clocked before reading
+ * the module SYSSTATUS register
+ */
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(&pdev->dev);
+ goto iounmap_base;
+ }
+
/* Determine number of locks */
i = readl(io_base + SYSSTATUS_OFFSET);
i >>= SPINLOCK_NUMLOCKS_BIT_OFFSET;
+ /*
+ * runtime PM will make sure the clock of this module is
+ * enabled again iff at least one lock is requested
+ */
+ ret = pm_runtime_put(&pdev->dev);
+ if (ret < 0)
+ goto iounmap_base;
+
/* one of the four lsb's must be set, and nothing else */
if (hweight_long(i & 0xf) != 1 || i > 8) {
ret = -EINVAL;
@@ -121,12 +140,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
- /*
- * runtime PM will make sure the clock of this module is
- * enabled iff at least one lock is requested
- */
- pm_runtime_enable(&pdev->dev);
-
ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
pdata->base_id, num_locks);
if (ret)
@@ -135,9 +148,9 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
return 0;
reg_fail:
- pm_runtime_disable(&pdev->dev);
kfree(bank);
iounmap_base:
+ pm_runtime_disable(&pdev->dev);
iounmap(io_base);
return ret;
}
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 2bc7f5af64f..f6d29614cb0 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
- printk(KERN_ERR SPFX "Unable to find port agent\n");
+ dev_err(&device->dev, "Unable to find port agent\n");
return;
}
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
PTR_ERR(ah));
return;
}
@@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_KERNEL);
if (IS_ERR(send_buf)) {
- printk(KERN_ERR SPFX "ib_create_send_mad error\n");
+ dev_err(&device->dev, "ib_create_send_mad error\n");
goto err1;
}
@@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
}
if (ib_post_send_mad(send_buf, NULL)) {
- printk(KERN_ERR SPFX "ib_post_send_mad error\n");
+ dev_err(&device->dev, "ib_post_send_mad error\n");
goto err2;
}
return;
@@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+ dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
@@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
port_priv->agent[0] = ib_register_mad_agent(device, port_num,
IB_QPT_SMI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(port_priv->agent[0])) {
ret = PTR_ERR(port_priv->agent[0]);
goto error2;
@@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
port_priv->agent[1] = ib_register_mad_agent(device, port_num,
IB_QPT_GSI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(port_priv->agent[1])) {
ret = PTR_ERR(port_priv->agent[1]);
goto error3;
@@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del(&port_priv->port_list);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c3239170d8b..e28a494e2a3 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device)
struct cm_port *port;
struct ib_mad_reg_req reg_req = {
.mgmt_class = IB_MGMT_CLASS_CM,
- .mgmt_class_version = IB_CM_CLASS_VERSION
+ .mgmt_class_version = IB_CM_CLASS_VERSION,
};
struct ib_port_modify port_modify = {
.set_port_cap_mask = IB_PORT_CM_SUP
@@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device)
0,
cm_send_handler,
cm_recv_handler,
- port);
+ port,
+ 0);
if (IS_ERR(port->mad_agent))
goto error2;
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 3d2e489ab73..ff9163dc159 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -46,6 +46,7 @@
#include <linux/completion.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/sysctl.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
@@ -65,6 +66,20 @@ struct iwcm_work {
struct list_head free_list;
};
+static unsigned int default_backlog = 256;
+
+static struct ctl_table_header *iwcm_ctl_table_hdr;
+static struct ctl_table iwcm_ctl_table[] = {
+ {
+ .procname = "default_backlog",
+ .data = &default_backlog,
+ .maxlen = sizeof(default_backlog),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
/*
* The following services provide a mechanism for pre-allocating iwcm_work
* elements. The design pre-allocates them based on the cm_id type:
@@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ if (!backlog)
+ backlog = default_backlog;
+
ret = alloc_work_entries(cm_id_priv, backlog);
if (ret)
return ret;
@@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void)
if (!iwcm_wq)
return -ENOMEM;
+ iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
+ iwcm_ctl_table);
+ if (!iwcm_ctl_table_hdr) {
+ pr_err("iw_cm: couldn't register sysctl paths\n");
+ destroy_workqueue(iwcm_wq);
+ return -ENOMEM;
+ }
+
return 0;
}
static void __exit iw_cm_cleanup(void)
{
+ unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ab31f136d04..74c30f4c557 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -33,6 +33,9 @@
* SOFTWARE.
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 rmpp_version,
ib_mad_send_handler send_handler,
ib_mad_recv_handler recv_handler,
- void *context)
+ void *context,
+ u32 registration_flags)
{
struct ib_mad_port_private *port_priv;
struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
@@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate parameters */
qpn = get_spl_qp_index(qp_type);
- if (qpn == -1)
+ if (qpn == -1) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid QP Type %d\n",
+ qp_type);
goto error1;
+ }
- if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
+ if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid RMPP Version %u\n",
+ rmpp_version);
goto error1;
+ }
/* Validate MAD registration request if supplied */
if (mad_reg_req) {
- if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
+ if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid Class Version %u\n",
+ mad_reg_req->mgmt_class_version);
goto error1;
- if (!recv_handler)
+ }
+ if (!recv_handler) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: no recv_handler\n");
goto error1;
+ }
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
/*
* IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
* one in this range currently allowed
*/
if (mad_reg_req->mgmt_class !=
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
} else if (mad_reg_req->mgmt_class == 0) {
/*
* Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid Mgmt Class 0\n");
goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/*
* If class is in "new" vendor range,
* ensure supplied OUI is not zero
*/
- if (!is_vendor_oui(mad_reg_req->oui))
+ if (!is_vendor_oui(mad_reg_req->oui)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: No OUI specified for class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
/* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
- if (rmpp_version)
+ if (rmpp_version) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
+
/* Make sure class supplied is consistent with QP type */
if (qp_type == IB_QPT_SMI) {
if ((mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class !=
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
} else {
if ((mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class ==
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
} else {
/* No registration request supplied */
if (!send_handler)
goto error1;
+ if (registration_flags & IB_MAD_USER_RMPP)
+ goto error1;
}
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
+ dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
ret = ERR_PTR(-ENODEV);
goto error1;
}
@@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Verify the QP requested is supported. For example, Ethernet devices
* will not have QP0 */
if (!port_priv->qp_info[qpn].qp) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: QP %d not supported\n", qpn);
ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1;
}
@@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
mad_agent_priv->agent.context = context;
mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
mad_agent_priv->agent.port_num = port_num;
+ mad_agent_priv->agent.flags = registration_flags;
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
@@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
IB_SMI_DISCARD) {
ret = -EINVAL;
- printk(KERN_ERR PFX "Invalid directed route\n");
+ dev_err(&device->dev, "Invalid directed route\n");
goto out;
}
@@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for local response MAD\n");
+ dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
- "alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
+ dev_err(&send_buf->mad_agent->device->dev,
+ "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
+ sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
return 0;
}
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
+{
+ return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
+}
+EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
+
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
int rmpp_active,
@@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
pad = get_pad_size(hdr_len, data_len);
message_size = hdr_len + data_len + pad;
- if ((!mad_agent->rmpp_version &&
- (rmpp_active || message_size > sizeof(struct ib_mad))) ||
- (!rmpp_active && message_size > sizeof(struct ib_mad)))
- return ERR_PTR(-EINVAL);
+ if (ib_mad_kernel_rmpp_agent(mad_agent)) {
+ if (!rmpp_active && message_size > sizeof(struct ib_mad))
+ return ERR_PTR(-EINVAL);
+ } else
+ if (rmpp_active || message_size > sizeof(struct ib_mad))
+ return ERR_PTR(-EINVAL);
size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
@@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
&mad_agent_priv->send_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
ret = ib_send_rmpp_mad(mad_send_wr);
if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
ret = ib_send_mad(mad_send_wr);
@@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp);
int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
struct ib_wc *wc)
{
- printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+ dev_err(&mad_agent->device->dev,
+ "ib_process_mad_wc() not implemented yet\n");
return 0;
}
EXPORT_SYMBOL(ib_process_mad_wc);
@@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
- printk(KERN_ERR PFX "Method %d already in use\n", i);
+ pr_err("Method %d already in use\n", i);
return -EINVAL;
}
}
@@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
if (!*method) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_method_table\n");
+ pr_err("No memory for ib_mad_mgmt_method_table\n");
return -ENOMEM;
}
@@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
if (!vendor) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class_table\n");
goto error1;
}
@@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
if (!vendor_class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class\n");
goto error2;
}
@@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
goto check_in_use;
}
}
- printk(KERN_ERR PFX "All OUI slots in use\n");
+ dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
goto error3;
check_in_use:
@@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
if (mad_agent->agent.recv_handler)
atomic_inc(&mad_agent->refcount);
else {
- printk(KERN_NOTICE PFX "No receive handler for client "
- "%p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
+ dev_notice(&port_priv->device->dev,
+ "No receive handler for client %p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
mad_agent = NULL;
}
}
@@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num)
/* Make sure MAD base version is understood */
if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
- printk(KERN_ERR PFX "MAD received with unsupported base "
- "version %d\n", mad->mad_hdr.base_version);
+ pr_err("MAD received with unsupported base version %d\n",
+ mad->mad_hdr.base_version);
goto out;
}
@@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
return !mad_agent_priv->agent.rmpp_version ||
+ !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE) ||
(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
@@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
mad_recv_wc);
if (!mad_recv_wc) {
@@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
if (!mad_send_wr) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- ib_free_recv_mad(mad_recv_wc);
- deref_mad_agent(mad_agent_priv);
- return;
- }
- ib_mark_mad_done(mad_send_wr);
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
+ && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
+ & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ /* user rmpp is in effect
+ * and this is an active RMPP MAD
+ */
+ mad_recv_wc->wc->wr_id = 0;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
+ } else {
+ /* not user rmpp, revert to normal behavior and
+ * drop the mad */
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ return;
+ }
+ } else {
+ ib_mark_mad_done(mad_send_wr);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- /* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ /* Defined behavior is to complete response before request */
+ mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
- mad_send_wc.status = IB_WC_SUCCESS;
- mad_send_wc.vendor_err = 0;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ }
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
@@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!response) {
- printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
- "for response buffer\n");
+ dev_err(&port_priv->device->dev,
+ "ib_mad_recv_done_handler no memory for response buffer\n");
goto out;
}
@@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
mad_agent_priv = mad_send_wr->mad_agent_priv;
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
if (ret == IB_RMPP_RESULT_CONSUMED)
goto done;
@@ -2176,7 +2249,8 @@ retry:
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
&bad_send_wr);
if (ret) {
- printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "ib_post_send failed: %d\n", ret);
mad_send_wr = queued_send_wr;
wc->status = IB_WC_LOC_QP_OP_ERR;
goto retry;
@@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
IB_QP_STATE | IB_QP_CUR_STATE);
kfree(attr);
if (ret)
- printk(KERN_ERR PFX "mad_error_handler - "
- "ib_modify_qp to RTS : %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "mad_error_handler - ib_modify_qp to RTS : %d\n",
+ ret);
else
mark_sends_for_retry(qp_info);
}
@@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work)
if (local->mad_priv) {
recv_mad_agent = local->recv_mad_agent;
if (!recv_mad_agent) {
- printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+ dev_err(&mad_agent_priv->agent.device->dev,
+ "No receive MAD agent for local completion\n");
free_mad = 1;
goto local_send_completion;
}
@@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
- if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
switch (ret) {
case IB_RMPP_RESULT_UNHANDLED:
@@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
} else {
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!mad_priv) {
- printk(KERN_ERR PFX "No memory for receive buffer\n");
+ dev_err(&qp_info->port_priv->device->dev,
+ "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof mad_priv->header,
DMA_FROM_DEVICE);
kmem_cache_free(ib_mad_cache, mad_priv);
- printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+ dev_err(&qp_info->port_priv->device->dev,
+ "ib_post_recv failed: %d\n", ret);
break;
}
} while (post);
@@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
- printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't kmalloc ib_qp_attr\n");
return -ENOMEM;
}
@@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "INIT: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to INIT: %d\n",
+ i, ret);
goto out;
}
attr->qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTR: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTR: %d\n",
+ i, ret);
goto out;
}
@@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
attr->sq_psn = IB_MAD_SEND_Q_PSN;
ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTS: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTS: %d\n",
+ i, ret);
goto out;
}
}
ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
if (ret) {
- printk(KERN_ERR PFX "Failed to request completion "
- "notification: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "Failed to request completion notification: %d\n",
+ ret);
goto out;
}
@@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
if (ret) {
- printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't post receive WRs\n");
goto out;
}
}
@@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
struct ib_mad_qp_info *qp_info = qp_context;
/* It's worse than that! He's dead, Jim! */
- printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+ dev_err(&qp_info->port_priv->device->dev,
+ "Fatal error (%d) on MAD QP (%d)\n",
event->event, qp_info->qp->qp_num);
}
@@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
qp_init_attr.event_handler = qp_event_handler;
qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
if (IS_ERR(qp_info->qp)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
- get_spl_qp_index(qp_type));
+ dev_err(&qp_info->port_priv->device->dev,
+ "Couldn't create ib_mad QP%d\n",
+ get_spl_qp_index(qp_type));
ret = PTR_ERR(qp_info->qp);
goto error;
}
@@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device,
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_port_private\n");
return -ENOMEM;
}
@@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device,
ib_mad_thread_completion_handler,
NULL, port_priv, cq_size, 0);
if (IS_ERR(port_priv->cq)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+ dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
goto error3;
}
port_priv->pd = ib_alloc_pd(device);
if (IS_ERR(port_priv->pd)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
goto error4;
}
port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(port_priv->mr)) {
- printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+ dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
ret = PTR_ERR(port_priv->mr);
goto error5;
}
@@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device,
ret = ib_mad_port_start(port_priv);
if (ret) {
- printk(KERN_ERR PFX "Couldn't start port\n");
+ dev_err(&device->dev, "Couldn't start port\n");
goto error9;
}
@@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- printk(KERN_ERR PFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device)
for (i = start; i <= end; i++) {
if (ib_mad_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
if (ib_agent_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't open port %d for agents\n", i);
goto error_agent;
}
}
@@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device)
error_agent:
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
error:
i--;
while (i >= start) {
if (ib_agent_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n", i);
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
i--;
}
}
@@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device)
}
for (i = 0; i < num_ports; i++, cur_port++) {
if (ib_agent_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, cur_port);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n",
+ cur_port);
if (ib_mad_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, cur_port);
+ dev_err(&device->dev, "Couldn't close port %d\n",
+ cur_port);
}
}
@@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void)
SLAB_HWCACHE_ALIGN,
NULL);
if (!ib_mad_cache) {
- printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+ pr_err("Couldn't create ib_mad cache\n");
ret = -ENOMEM;
goto error1;
}
@@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void)
INIT_LIST_HEAD(&ib_mad_port_list);
if (ib_register_client(&mad_client)) {
- printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+ pr_err("Couldn't register ib_mad client\n");
ret = -EINVAL;
goto error2;
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4969c..d1a0b0ee944 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -42,9 +42,6 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
-
-#define PFX "ib_mad: "
-
#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
/* QP and CQ parameters */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 233eaf541f5..c38f030f0dc 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
- recv_handler, sa_dev);
+ recv_handler, sa_dev, 0);
if (IS_ERR(sa_dev->port[i].agent))
goto err;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1acb9910055..928cdd20e2d 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -33,6 +33,8 @@
* SOFTWARE.
*/
+#define pr_fmt(fmt) "user_mad: " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
@@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
- if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
- copy_offset = IB_MGMT_MAD_HDR;
- rmpp_active = 0;
- } else {
+
+ if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ && ib_mad_kernel_rmpp_agent(agent)) {
copy_offset = IB_MGMT_RMPP_HDR;
rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
- IB_MGMT_RMPP_FLAG_ACTIVE;
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ } else {
+ copy_offset = IB_MGMT_MAD_HDR;
+ rmpp_active = 0;
}
data_len = count - hdr_size(file) - hdr_len;
@@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rmpp_mad->mad_hdr.tid = *tid;
}
- spin_lock_irq(&file->send_lock);
- ret = is_duplicate(file, packet);
- if (!ret)
+ if (!ib_mad_kernel_rmpp_agent(agent)
+ && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ spin_lock_irq(&file->send_lock);
list_add_tail(&packet->list, &file->send_list);
- spin_unlock_irq(&file->send_lock);
- if (ret) {
- ret = -EINVAL;
- goto err_msg;
+ spin_unlock_irq(&file->send_lock);
+ } else {
+ spin_lock_irq(&file->send_lock);
+ ret = is_duplicate(file, packet);
+ if (!ret)
+ list_add_tail(&packet->list, &file->send_list);
+ spin_unlock_irq(&file->send_lock);
+ if (ret) {
+ ret = -EINVAL;
+ goto err_msg;
+ }
}
ret = ib_post_send_mad(packet->msg, NULL);
@@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: invalid device\n");
ret = -EPIPE;
goto out;
}
@@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: invalid QPN %d specified\n",
+ ureq.qpn);
ret = -EINVAL;
goto out;
}
@@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
found:
if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -657,7 +678,7 @@ found:
ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
ureq.mgmt_class ? &req : NULL,
ureq.rmpp_version,
- send_handler, recv_handler, file);
+ send_handler, recv_handler, file, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
agent = NULL;
@@ -673,10 +694,11 @@ found:
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- printk(KERN_WARNING "user_mad: process %s did not enable "
- "P_Key index support.\n", current->comm);
- printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
- "has info on the new ABI.\n");
+ dev_warn(file->port->dev,
+ "process %s did not enable P_Key index support.\n",
+ current->comm);
+ dev_warn(file->port->dev,
+ " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
}
}
@@ -694,6 +716,119 @@ out:
return ret;
}
+static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
+{
+ struct ib_user_mad_reg_req2 ureq;
+ struct ib_mad_reg_req req;
+ struct ib_mad_agent *agent = NULL;
+ int agent_id;
+ int ret;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ if (!file->port->ib_dev) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: invalid device\n");
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (copy_from_user(&ureq, arg, sizeof(ureq))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: invalid QPN %d specified\n",
+ ureq.qpn);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+ ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+ ret = -EINVAL;
+
+ if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
+ (u32 __user *) (arg + offsetof(struct
+ ib_user_mad_reg_req2, flags))))
+ ret = -EFAULT;
+
+ goto out;
+ }
+
+ for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+ if (!__get_agent(file, agent_id))
+ goto found;
+
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
+ ret = -ENOMEM;
+ goto out;
+
+found:
+ if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
+ req.mgmt_class = ureq.mgmt_class;
+ req.mgmt_class_version = ureq.mgmt_class_version;
+ if (ureq.oui & 0xff000000) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+ ureq.oui);
+ ret = -EINVAL;
+ goto out;
+ }
+ req.oui[2] = ureq.oui & 0x0000ff;
+ req.oui[1] = (ureq.oui & 0x00ff00) >> 8;
+ req.oui[0] = (ureq.oui & 0xff0000) >> 16;
+ memcpy(req.method_mask, ureq.method_mask,
+ sizeof(req.method_mask));
+ }
+
+ agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+ ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+ ureq.mgmt_class ? &req : NULL,
+ ureq.rmpp_version,
+ send_handler, recv_handler, file,
+ ureq.flags);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ agent = NULL;
+ goto out;
+ }
+
+ if (put_user(agent_id,
+ (u32 __user *)(arg +
+ offsetof(struct ib_user_mad_reg_req2, id)))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (!file->already_used) {
+ file->already_used = 1;
+ file->use_pkey_index = 1;
+ }
+
+ file->agent[agent_id] = agent;
+ ret = 0;
+
+out:
+ mutex_unlock(&file->mutex);
+
+ if (ret && agent)
+ ib_unregister_mad_agent(agent);
+
+ mutex_unlock(&file->port->file_mutex);
+
+ return ret;
+}
+
+
static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
{
struct ib_mad_agent *agent = NULL;
@@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_REGISTER_AGENT2:
+ return ib_umad_reg_agent2(filp->private_data, (void __user *) arg);
default:
return -ENOIOCTLCMD;
}
@@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_REGISTER_AGENT2:
+ return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg));
default:
return -ENOIOCTLCMD;
}
@@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO,
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(void)
+static int find_overflow_devnum(struct ib_device *device)
{
int ret;
@@ -991,7 +1130,8 @@ static int find_overflow_devnum(void)
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ dev_err(&device->dev,
+ "couldn't register dynamic device number\n");
return ret;
}
}
@@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
- devnum = find_overflow_devnum();
+ devnum = find_overflow_devnum(device);
if (devnum < 0)
return -1;
@@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void)
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register device number\n");
+ pr_err("couldn't register device number\n");
goto out;
}
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
- printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+ pr_err("couldn't create class infiniband_mad\n");
goto out_chrdev;
}
@@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void)
ret = class_create_file(umad_class, &class_attr_abi_version.attr);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+ pr_err("couldn't create abi_version attribute\n");
goto out_class;
}
ret = ib_register_client(&umad_client);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+ pr_err("couldn't register ib_umad client\n");
goto out_class;
}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a283274a5a0..643c08a025a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(rereg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(alloc_mw);
IB_UVERBS_DECLARE_CMD(dealloc_mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ea6203ee7bc..0600c50e621 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1002,6 +1002,99 @@ err_free:
return ret;
}
+ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_rereg_mr cmd;
+ struct ib_uverbs_rereg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_pd *pd = NULL;
+ struct ib_mr *mr;
+ struct ib_pd *old_pd;
+ int ret;
+ struct ib_uobject *uobj;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ return -EINVAL;
+
+ if ((cmd.flags & IB_MR_REREG_TRANS) &&
+ (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+ file->ucontext);
+
+ if (!uobj)
+ return -EINVAL;
+
+ mr = uobj->object;
+
+ if (cmd.flags & IB_MR_REREG_ACCESS) {
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ goto put_uobjs;
+ }
+
+ if (cmd.flags & IB_MR_REREG_PD) {
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+ }
+
+ if (atomic_read(&mr->usecnt)) {
+ ret = -EBUSY;
+ goto put_uobj_pd;
+ }
+
+ old_pd = mr->pd;
+ ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd, &udata);
+ if (!ret) {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_inc(&pd->usecnt);
+ mr->pd = pd;
+ atomic_dec(&old_pd->usecnt);
+ }
+ } else {
+ goto put_uobj_pd;
+ }
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+ else
+ ret = in_len;
+
+put_uobj_pd:
+ if (cmd.flags & IB_MR_REREG_PD)
+ put_pd_read(pd);
+
+put_uobjs:
+
+ put_uobj_write(mr->uobject);
+
+ return ret;
+}
+
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 08219fb3338..c73b22a257f 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
[IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
[IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
[IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 49e0e8533f7..1b63185b4ad 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
mq->msg_pool.host, dma_unmap_addr(mq, mapping));
}
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
- int msg_size)
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
+ size_t q_size, size_t msg_size)
{
u8 *pool_start;
+ if (q_size > SIZE_MAX / msg_size)
+ return -EINVAL;
+
pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
&mq->host_dma, GFP_KERNEL);
if (!pool_start)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index fbe6051af25..c9df0549f51 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
chp = get_chp(dev, qid);
if (chp) {
+ t4_clear_cq_armed(&chp->cq);
spin_lock_irqsave(&chp->comp_handler_lock, flag);
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index c158fcc02bc..41cd6882b64 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
struct c4iw_cq *schp)
{
int count;
- int flushed;
+ int rq_flushed, sq_flushed;
unsigned long flag;
PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
@@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
c4iw_flush_hw_cq(rchp);
c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
- flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+ rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&rchp->lock, flag);
- if (flushed) {
- spin_lock_irqsave(&rchp->comp_handler_lock, flag);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
- }
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, flag);
spin_lock(&qhp->lock);
if (schp != rchp)
c4iw_flush_hw_cq(schp);
- flushed = c4iw_flush_sq(qhp);
+ sq_flushed = c4iw_flush_sq(qhp);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, flag);
- if (flushed) {
- spin_lock_irqsave(&schp->comp_handler_lock, flag);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
- spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+
+ if (schp == rchp) {
+ if (t4_clear_cq_armed(&rchp->cq) &&
+ (rq_flushed || sq_flushed)) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+ } else {
+ if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+ if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
}
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index df5edfa31a8..c04e5134b30 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq)
return !wq->rq.queue[wq->rq.size].status.db_off;
}
+enum t4_cq_flags {
+ CQ_ARMED = 1,
+};
+
struct t4_cq {
struct t4_cqe *queue;
dma_addr_t dma_addr;
@@ -544,12 +548,19 @@ struct t4_cq {
u16 cidx_inc;
u8 gen;
u8 error;
+ unsigned long flags;
};
+static inline int t4_clear_cq_armed(struct t4_cq *cq)
+{
+ return test_and_clear_bit(CQ_ARMED, &cq->flags);
+}
+
static inline int t4_arm_cq(struct t4_cq *cq, int se)
{
u32 val;
+ set_bit(CQ_ARMED, &cq->flags);
while (cq->cidx_inc > CIDXINC_MASK) {
val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
INGRESSQID(cq->cqid);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 43f2d0424d4..e890e5ba0e0 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -726,7 +726,7 @@ bail:
* @dd: the infinipath device
* @pkeys: the PKEY table
*/
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
{
struct ipath_portdata *pd;
int i;
@@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
}
if (changed) {
u64 pkey;
+ struct ib_event event;
pkey = (u64) dd->ipath_pkeys[0] |
((u64) dd->ipath_pkeys[1] << 16) |
@@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
(unsigned long long) pkey);
ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
pkey);
+
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.device = &dd->verbs_dev->ibdev;
+ event.element.port_num = port;
+ ib_dispatch_event(&event);
}
return 0;
}
static int recv_subn_set_pkeytable(struct ib_smp *smp,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u8 port)
{
u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
__be16 *p = (__be16 *) smp->data;
@@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
for (i = 0; i < n; i++)
q[i] = be16_to_cpu(p[i]);
- if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
+ if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
smp->status |= IB_SMP_INVALID_FIELD;
return recv_subn_get_pkeytable(smp, ibdev);
@@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
ret = recv_subn_set_portinfo(smp, ibdev, port_num);
goto bail;
case IB_SMP_ATTR_PKEY_TABLE:
- ret = recv_subn_set_pkeytable(smp, ibdev);
+ ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
goto bail;
case IB_SMP_ATTR_SM_INFO:
if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 287ad0564ac..82a7dd87089 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0f7027e7db1..e1e558a3d69 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -910,8 +910,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
const struct default_rules *pdefault_rules = default_table;
u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
- for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
- pdefault_rules++) {
+ for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
memset(&field_types, 0, sizeof(field_types));
@@ -965,8 +964,7 @@ static int __mlx4_ib_create_default_rules(
int size = 0;
int i;
- for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
- sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
int ret;
union ib_flow_spec ib_spec;
switch (pdefault_rules->rules_create_list[i]) {
@@ -2007,6 +2005,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
@@ -2059,6 +2058,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
+ ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 369da3ca5d6..e8cad3926bf 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -788,5 +788,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+ u64 start, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index cb2a8727f3f..9b0e80e59b0 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
+ /* Force registering the memory as writable. */
+ /* Used for memory re-registeration. HCA protects the access */
mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
+ access_flags | IB_ACCESS_LOCAL_WRITE, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -183,6 +185,90 @@ err_free:
return ERR_PTR(err);
}
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+ u64 start, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(mr->device);
+ struct mlx4_ib_mr *mmr = to_mmr(mr);
+ struct mlx4_mpt_entry *mpt_entry;
+ struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
+ int err;
+
+ /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
+ * we assume that the calls can't run concurrently. Otherwise, a
+ * race exists.
+ */
+ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
+
+ if (err)
+ return err;
+
+ if (flags & IB_MR_REREG_PD) {
+ err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
+ to_mpd(pd)->pdn);
+
+ if (err)
+ goto release_mpt_entry;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
+ convert_access(mr_access_flags));
+
+ if (err)
+ goto release_mpt_entry;
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ int shift;
+ int err;
+ int n;
+
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+ ib_umem_release(mmr->umem);
+ mmr->umem = ib_umem_get(mr->uobject->context, start, length,
+ mr_access_flags |
+ IB_ACCESS_LOCAL_WRITE,
+ 0);
+ if (IS_ERR(mmr->umem)) {
+ err = PTR_ERR(mmr->umem);
+ mmr->umem = NULL;
+ goto release_mpt_entry;
+ }
+ n = ib_umem_page_count(mmr->umem);
+ shift = ilog2(mmr->umem->page_size);
+
+ mmr->mmr.iova = virt_addr;
+ mmr->mmr.size = length;
+ err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
+ virt_addr, length, n, shift,
+ *pmpt_entry);
+ if (err) {
+ ib_umem_release(mmr->umem);
+ goto release_mpt_entry;
+ }
+
+ err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
+ if (err) {
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+ ib_umem_release(mmr->umem);
+ goto release_mpt_entry;
+ }
+ }
+
+ /* If we couldn't transfer the MR to the HCA, just remember to
+ * return a failure. But dereg_mr will free the resources.
+ */
+ err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
+
+release_mpt_entry:
+ mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
+
+ return err;
+}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7efe6e3f354..8c574b63d77 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2501,7 +2501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+ if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
err = -EINVAL;
*bad_wr = wr;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index b6f7f457fc5..8881fa376e0 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev)
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 19011dbb930..b43456ae124 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -40,7 +40,7 @@
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u"
#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -137,6 +137,7 @@ struct mqe_ctx {
u16 cqe_status;
u16 ext_status;
bool cmd_done;
+ bool fw_error_state;
};
struct ocrdma_hw_mr {
@@ -235,7 +236,10 @@ struct ocrdma_dev {
struct list_head entry;
struct rcu_head rcu;
int id;
- u64 stag_arr[OCRDMA_MAX_STAG];
+ u64 *stag_arr;
+ u8 sl; /* service level */
+ bool pfc_state;
+ atomic_t update_sl;
u16 pvid;
u32 asic_id;
@@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
}
+static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio)
+{
+ return *(pfc + prio);
+}
+
+static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio)
+{
+ return *(app_prio + prio);
+}
+
+static inline u8 ocrdma_is_enabled_and_synced(u32 state)
+{ /* May also be used to interpret TC-state, QCN-state
+ * Appl-state and Logical-link-state in future.
+ */
+ return (state & OCRDMA_STATE_FLAG_ENABLED) &&
+ (state & OCRDMA_STATE_FLAG_SYNC);
+}
+
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index d4cc01f10c0..40f8536c10b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -35,6 +35,8 @@
#include "ocrdma_ah.h"
#include "ocrdma_hw.h"
+#define OCRDMA_VID_PCP_SHIFT 0xD
+
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, int pdid)
{
@@ -55,7 +57,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
if (vlan_tag && (vlan_tag < 0x1000)) {
eth.eth_type = cpu_to_be16(0x8100);
eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
- vlan_tag |= (attr->sl & 7) << 13;
+ vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
eth.vlan_tag = cpu_to_be16(vlan_tag);
eth_sz = sizeof(struct ocrdma_eth_vlan);
vlan_enabled = true;
@@ -100,6 +102,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ if (atomic_cmpxchg(&dev->update_sl, 1, 0))
+ ocrdma_init_service_level(dev);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 3bbf2010a82..dd35ae558ae 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -525,7 +525,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
cmd->eqn = eq->id;
- cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
+ cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe);
ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
cq->dma, PAGE_SIZE_4K);
@@ -661,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
{
struct ocrdma_qp *qp = NULL;
struct ocrdma_cq *cq = NULL;
- struct ib_event ib_evt = { 0 };
+ struct ib_event ib_evt;
int cq_event = 0;
int qp_event = 1;
int srq_event = 0;
@@ -674,6 +674,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+ memset(&ib_evt, 0, sizeof(ib_evt));
+
ib_evt.device = &dev->ibdev;
switch (type) {
@@ -771,6 +773,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
break;
+
+ case OCRDMA_ASYNC_EVENT_COS_VALUE:
+ atomic_set(&dev->update_sl, 1);
+ break;
default:
/* Not interested evts. */
break;
@@ -962,8 +968,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
msecs_to_jiffies(30000));
if (status)
return 0;
- else
+ else {
+ dev->mqe_ctx.fw_error_state = true;
+ pr_err("%s(%d) mailbox timeout: fw not responding\n",
+ __func__, dev->id);
return -1;
+ }
}
/* issue a mailbox command on the MQ */
@@ -975,6 +985,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
struct ocrdma_mbx_rsp *rsp = NULL;
mutex_lock(&dev->mqe_ctx.lock);
+ if (dev->mqe_ctx.fw_error_state)
+ goto mbx_err;
ocrdma_post_mqe(dev, mqe);
status = ocrdma_wait_mqe_cmpl(dev);
if (status)
@@ -1078,7 +1090,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
attr->max_mw = rsp->max_mw;
attr->max_mr = rsp->max_mr;
- attr->max_mr_size = ~0ull;
+ attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) |
+ rsp->max_mr_size_lo;
attr->max_fmr = 0;
attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
@@ -1252,7 +1265,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
- dev->hba_port_num = hba_attribs->phy_port;
+ dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
+ OCRDMA_HBA_ATTRB_PTNUM_MASK)
+ >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
strncpy(dev->model_number,
hba_attribs->controller_model_number, 31);
}
@@ -1302,7 +1317,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
goto mbx_err;
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
- *lnk_speed = rsp->phys_port_speed;
+ *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+ >> OCRDMA_PHY_PS_SHIFT;
mbx_err:
kfree(cmd);
@@ -1328,11 +1344,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
goto mbx_err;
rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
- dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+ dev->phy.phy_type =
+ (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK);
+ dev->phy.interface_type =
+ (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK)
+ >> OCRDMA_IF_TYPE_SHIFT;
dev->phy.auto_speeds_supported =
- le16_to_cpu(rsp->auto_speeds_supported);
+ (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK);
dev->phy.fixed_speeds_supported =
- le16_to_cpu(rsp->fixed_speeds_supported);
+ (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK)
+ >> OCRDMA_FSPEED_SUPP_SHIFT;
mbx_err:
kfree(cmd);
return status;
@@ -1457,8 +1478,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
- pbes[i].pa_lo = (u32) (pa & 0xffffffff);
- pbes[i].pa_hi = (u32) upper_32_bits(pa);
+ pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff);
+ pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa));
pa += PAGE_SIZE;
}
cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
@@ -1501,6 +1522,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
dev->av_tbl.pa);
+ dev->av_tbl.va = NULL;
dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
dev->av_tbl.pbl.pa);
kfree(cmd);
@@ -1624,14 +1646,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
OCRDMA_CREATE_CQ_TYPE_SHIFT;
cq->phase_change = false;
- cmd->cmd.cqe_count = (cq->len / cqe_size);
+ cmd->cmd.pdid_cqecnt = (cq->len / cqe_size);
} else {
- cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
+ cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1;
cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
cq->phase_change = true;
}
- cmd->cmd.pd_id = pd_id; /* valid only for v3 */
+ /* pd_id valid only for v3 */
+ cmd->cmd.pdid_cqecnt |= (pd_id <<
+ OCRDMA_CREATE_CQ_CMD_PDID_SHIFT);
ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (status)
@@ -2206,7 +2230,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
qp->rq_cq = cq;
- if (pd->dpp_enabled && pd->num_dpp_qp) {
+ if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
+ (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
dpp_cq_id);
}
@@ -2264,6 +2289,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
return -EINVAL;
+ if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0))
+ ocrdma_init_service_level(qp->dev);
cmd->params.tclass_sq_psn |=
(ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
@@ -2297,6 +2324,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
+ cmd->params.rnt_rc_sl_fl |=
+ (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
}
return 0;
}
@@ -2604,6 +2633,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
return status;
}
+static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
+ struct ocrdma_dcbx_cfg *dcbxcfg)
+{
+ int status = 0;
+ dma_addr_t pa;
+ struct ocrdma_mqe cmd;
+
+ struct ocrdma_get_dcbx_cfg_req *req = NULL;
+ struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge;
+
+ memset(&cmd, 0, sizeof(struct ocrdma_mqe));
+ cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp),
+ sizeof(struct ocrdma_get_dcbx_cfg_req));
+ req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL);
+ if (!req) {
+ status = -ENOMEM;
+ goto mem_err;
+ }
+
+ cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL);
+ mqe_sge->pa_hi = (u32) upper_32_bits(pa);
+ mqe_sge->len = cmd.hdr.pyld_len;
+
+ memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
+ ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
+ OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
+ req->param_type = ptype;
+
+ status = ocrdma_mbx_cmd(dev, &cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req;
+ ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp));
+ memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg));
+
+mbx_err:
+ dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa);
+mem_err:
+ return status;
+}
+
+#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08
+#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05
+
+static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
+ struct ocrdma_dcbx_cfg *dcbxcfg,
+ u8 *srvc_lvl)
+{
+ int status = -EINVAL, indx, slindx;
+ int ventry_cnt;
+ struct ocrdma_app_parameter *app_param;
+ u8 valid, proto_sel;
+ u8 app_prio, pfc_prio;
+ u16 proto;
+
+ if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) {
+ pr_info("%s ocrdma%d DCBX is disabled\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ goto out;
+ }
+
+ if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) {
+ pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id,
+ (ptype > 0 ? "operational" : "admin"),
+ (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ?
+ "enabled" : "disabled",
+ (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ?
+ "" : ", not sync'ed");
+ goto out;
+ } else {
+ pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ }
+
+ ventry_cnt = (dcbxcfg->tcv_aev_opv_st >>
+ OCRDMA_DCBX_APP_ENTRY_SHIFT)
+ & OCRDMA_DCBX_STATE_MASK;
+
+ for (indx = 0; indx < ventry_cnt; indx++) {
+ app_param = &dcbxcfg->app_param[indx];
+ valid = (app_param->valid_proto_app >>
+ OCRDMA_APP_PARAM_VALID_SHIFT)
+ & OCRDMA_APP_PARAM_VALID_MASK;
+ proto_sel = (app_param->valid_proto_app
+ >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT)
+ & OCRDMA_APP_PARAM_PROTO_SEL_MASK;
+ proto = app_param->valid_proto_app &
+ OCRDMA_APP_PARAM_APP_PROTO_MASK;
+
+ if (
+ valid && proto == OCRDMA_APP_PROTO_ROCE &&
+ proto_sel == OCRDMA_PROTO_SELECT_L2) {
+ for (slindx = 0; slindx <
+ OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
+ app_prio = ocrdma_get_app_prio(
+ (u8 *)app_param->app_prio,
+ slindx);
+ pfc_prio = ocrdma_get_pfc_prio(
+ (u8 *)dcbxcfg->pfc_prio,
+ slindx);
+
+ if (app_prio && pfc_prio) {
+ *srvc_lvl = slindx;
+ status = 0;
+ goto out;
+ }
+ }
+ if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) {
+ pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n",
+ dev_name(&dev->nic_info.pdev->dev),
+ dev->id, proto);
+ }
+ }
+ }
+
+out:
+ return status;
+}
+
+void ocrdma_init_service_level(struct ocrdma_dev *dev)
+{
+ int status = 0, indx;
+ struct ocrdma_dcbx_cfg dcbxcfg;
+ u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL;
+ int ptype = OCRDMA_PARAMETER_TYPE_OPER;
+
+ for (indx = 0; indx < 2; indx++) {
+ status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg);
+ if (status) {
+ pr_err("%s(): status=%d\n", __func__, status);
+ ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+ continue;
+ }
+
+ status = ocrdma_parse_dcbxcfg_rsp(dev, ptype,
+ &dcbxcfg, &srvc_lvl);
+ if (status) {
+ ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+ continue;
+ }
+
+ break;
+ }
+
+ if (status)
+ pr_info("%s ocrdma%d service level default\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ else
+ pr_info("%s ocrdma%d service level %d\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id,
+ srvc_lvl);
+
+ dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state);
+ dev->sl = srvc_lvl;
+}
+
int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
{
int i;
@@ -2709,13 +2900,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
goto conf_err;
status = ocrdma_mbx_get_phy_info(dev);
if (status)
- goto conf_err;
+ goto info_attrb_err;
status = ocrdma_mbx_get_ctrl_attribs(dev);
if (status)
- goto conf_err;
+ goto info_attrb_err;
return 0;
+info_attrb_err:
+ ocrdma_mbx_delete_ah_tbl(dev);
conf_err:
ocrdma_destroy_mq(dev);
mq_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index e513f729314..6eed8f19132 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
char *port_speed_string(struct ocrdma_dev *dev);
+void ocrdma_init_service_level(struct ocrdma_dev *);
+
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7c504e07974..256a06bc0b6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
if (!dev->qp_tbl)
goto alloc_err;
}
+
+ dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL);
+ if (dev->stag_arr == NULL)
+ goto alloc_err;
+
spin_lock_init(&dev->av_tbl.lock);
spin_lock_init(&dev->flush_q_lock);
return 0;
@@ -334,6 +339,7 @@ alloc_err:
static void ocrdma_free_resources(struct ocrdma_dev *dev)
{
+ kfree(dev->stag_arr);
kfree(dev->qp_tbl);
kfree(dev->cq_tbl);
kfree(dev->sgid_tbl);
@@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
{
struct ocrdma_dev *dev = dev_get_drvdata(device);
- return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
+}
+
+static ssize_t show_hca_type(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
static struct device_attribute *ocrdma_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type
};
static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
@@ -372,6 +388,58 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
}
+static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
+ struct net_device *net)
+{
+ struct in_device *in_dev;
+ union ib_gid gid;
+ in_dev = in_dev_get(net);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ ocrdma_add_sgid(dev, &gid);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+}
+
+static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev,
+ struct net_device *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+ in6_dev = in6_dev_get(net);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ ocrdma_add_sgid(dev, pgid);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
+{
+ struct net_device *net_dev;
+
+ for_each_netdev(&init_net, net_dev) {
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ?
+ rdma_vlan_dev_real_dev(net_dev) : net_dev;
+
+ if (real_dev == dev->nic_info.netdev) {
+ ocrdma_init_ipv4_gids(dev, net_dev);
+ ocrdma_init_ipv6_gids(dev, net_dev);
+ }
+ }
+}
+
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
int status = 0, i;
@@ -399,6 +467,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
+ ocrdma_init_service_level(dev);
+ ocrdma_init_gid_table(dev);
status = ocrdma_register_device(dev);
if (status)
goto alloc_err;
@@ -508,6 +578,12 @@ static int ocrdma_close(struct ocrdma_dev *dev)
return 0;
}
+static void ocrdma_shutdown(struct ocrdma_dev *dev)
+{
+ ocrdma_close(dev);
+ ocrdma_remove(dev);
+}
+
/* event handling via NIC driver ensures that all the NIC specific
* initialization done before RoCE driver notifies
* event to stack.
@@ -521,6 +597,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
case BE_DEV_DOWN:
ocrdma_close(dev);
break;
+ case BE_DEV_SHUTDOWN:
+ ocrdma_shutdown(dev);
+ break;
}
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 96c9ee602ba..904989ec5ea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -44,35 +44,39 @@ enum {
#define OCRDMA_SUBSYS_ROCE 10
enum {
OCRDMA_CMD_QUERY_CONFIG = 1,
- OCRDMA_CMD_ALLOC_PD,
- OCRDMA_CMD_DEALLOC_PD,
-
- OCRDMA_CMD_CREATE_AH_TBL,
- OCRDMA_CMD_DELETE_AH_TBL,
-
- OCRDMA_CMD_CREATE_QP,
- OCRDMA_CMD_QUERY_QP,
- OCRDMA_CMD_MODIFY_QP,
- OCRDMA_CMD_DELETE_QP,
-
- OCRDMA_CMD_RSVD1,
- OCRDMA_CMD_ALLOC_LKEY,
- OCRDMA_CMD_DEALLOC_LKEY,
- OCRDMA_CMD_REGISTER_NSMR,
- OCRDMA_CMD_REREGISTER_NSMR,
- OCRDMA_CMD_REGISTER_NSMR_CONT,
- OCRDMA_CMD_QUERY_NSMR,
- OCRDMA_CMD_ALLOC_MW,
- OCRDMA_CMD_QUERY_MW,
-
- OCRDMA_CMD_CREATE_SRQ,
- OCRDMA_CMD_QUERY_SRQ,
- OCRDMA_CMD_MODIFY_SRQ,
- OCRDMA_CMD_DELETE_SRQ,
-
- OCRDMA_CMD_ATTACH_MCAST,
- OCRDMA_CMD_DETACH_MCAST,
- OCRDMA_CMD_GET_RDMA_STATS,
+ OCRDMA_CMD_ALLOC_PD = 2,
+ OCRDMA_CMD_DEALLOC_PD = 3,
+
+ OCRDMA_CMD_CREATE_AH_TBL = 4,
+ OCRDMA_CMD_DELETE_AH_TBL = 5,
+
+ OCRDMA_CMD_CREATE_QP = 6,
+ OCRDMA_CMD_QUERY_QP = 7,
+ OCRDMA_CMD_MODIFY_QP = 8 ,
+ OCRDMA_CMD_DELETE_QP = 9,
+
+ OCRDMA_CMD_RSVD1 = 10,
+ OCRDMA_CMD_ALLOC_LKEY = 11,
+ OCRDMA_CMD_DEALLOC_LKEY = 12,
+ OCRDMA_CMD_REGISTER_NSMR = 13,
+ OCRDMA_CMD_REREGISTER_NSMR = 14,
+ OCRDMA_CMD_REGISTER_NSMR_CONT = 15,
+ OCRDMA_CMD_QUERY_NSMR = 16,
+ OCRDMA_CMD_ALLOC_MW = 17,
+ OCRDMA_CMD_QUERY_MW = 18,
+
+ OCRDMA_CMD_CREATE_SRQ = 19,
+ OCRDMA_CMD_QUERY_SRQ = 20,
+ OCRDMA_CMD_MODIFY_SRQ = 21,
+ OCRDMA_CMD_DELETE_SRQ = 22,
+
+ OCRDMA_CMD_ATTACH_MCAST = 23,
+ OCRDMA_CMD_DETACH_MCAST = 24,
+
+ OCRDMA_CMD_CREATE_RBQ = 25,
+ OCRDMA_CMD_DESTROY_RBQ = 26,
+
+ OCRDMA_CMD_GET_RDMA_STATS = 27,
OCRDMA_CMD_MAX
};
@@ -103,7 +107,7 @@ enum {
#define OCRDMA_MAX_QP 2048
#define OCRDMA_MAX_CQ 2048
-#define OCRDMA_MAX_STAG 8192
+#define OCRDMA_MAX_STAG 16384
enum {
OCRDMA_DB_RQ_OFFSET = 0xE0,
@@ -422,7 +426,12 @@ struct ocrdma_ae_qp_mcqe {
#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
-#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
+
+enum ocrdma_async_grp5_events {
+ OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
+ OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02,
+ OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03
+};
enum OCRDMA_ASYNC_EVENT_TYPE {
OCRDMA_CQ_ERROR = 0x00,
@@ -525,8 +534,8 @@ struct ocrdma_mbx_query_config {
u32 max_ird_ord_per_qp;
u32 max_shared_ird_ord;
u32 max_mr;
- u32 max_mr_size_lo;
u32 max_mr_size_hi;
+ u32 max_mr_size_lo;
u32 max_num_mr_pbl;
u32 max_mw;
u32 max_fmr;
@@ -580,17 +589,26 @@ enum {
OCRDMA_FN_MODE_RDMA = 0x4
};
+enum {
+ OCRDMA_IF_TYPE_MASK = 0xFFFF0000,
+ OCRDMA_IF_TYPE_SHIFT = 0x10,
+ OCRDMA_PHY_TYPE_MASK = 0x0000FFFF,
+ OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000,
+ OCRDMA_FUTURE_DETAILS_SHIFT = 0x10,
+ OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF,
+ OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000,
+ OCRDMA_FSPEED_SUPP_SHIFT = 0x10,
+ OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF
+};
+
struct ocrdma_get_phy_info_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
- u16 phy_type;
- u16 interface_type;
+ u32 ityp_ptyp;
u32 misc_params;
- u16 ext_phy_details;
- u16 rsvd;
- u16 auto_speeds_supported;
- u16 fixed_speeds_supported;
+ u32 ftrdtl_exphydtl;
+ u32 fspeed_aspeed;
u32 future_use[2];
};
@@ -603,19 +621,34 @@ enum {
OCRDMA_PHY_SPEED_40GBPS = 0x20
};
+enum {
+ OCRDMA_PORT_NUM_MASK = 0x3F,
+ OCRDMA_PT_MASK = 0xC0,
+ OCRDMA_PT_SHIFT = 0x6,
+ OCRDMA_LINK_DUP_MASK = 0x0000FF00,
+ OCRDMA_LINK_DUP_SHIFT = 0x8,
+ OCRDMA_PHY_PS_MASK = 0x00FF0000,
+ OCRDMA_PHY_PS_SHIFT = 0x10,
+ OCRDMA_PHY_PFLT_MASK = 0xFF000000,
+ OCRDMA_PHY_PFLT_SHIFT = 0x18,
+ OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
+ OCRDMA_QOS_LNKSP_SHIFT = 0x10,
+ OCRDMA_LLST_MASK = 0xFF,
+ OCRDMA_PLFC_MASK = 0x00000400,
+ OCRDMA_PLFC_SHIFT = 0x8,
+ OCRDMA_PLRFC_MASK = 0x00000200,
+ OCRDMA_PLRFC_SHIFT = 0x8,
+ OCRDMA_PLTFC_MASK = 0x00000100,
+ OCRDMA_PLTFC_SHIFT = 0x8
+};
struct ocrdma_get_link_speed_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
- u8 pt_port_num;
- u8 link_duplex;
- u8 phys_port_speed;
- u8 phys_port_fault;
- u16 rsvd1;
- u16 qos_lnk_speed;
- u8 logical_lnk_status;
- u8 rsvd2[3];
+ u32 pflt_pps_ld_pnum;
+ u32 qos_lsp;
+ u32 res_lls;
};
enum {
@@ -666,8 +699,7 @@ struct ocrdma_create_cq_cmd {
u32 pgsz_pgcnt;
u32 ev_cnt_flags;
u32 eqn;
- u16 cqe_count;
- u16 pd_id;
+ u32 pdid_cqecnt;
u32 rsvd6;
struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
};
@@ -678,6 +710,10 @@ struct ocrdma_create_cq {
};
enum {
+ OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10
+};
+
+enum {
OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
};
@@ -1231,7 +1267,6 @@ struct ocrdma_destroy_srq {
enum {
OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
- OCRDMA_PD_MAX_DPP_ENABLED_QP = 8,
OCRDMA_DPP_PAGE_SIZE = 4096
};
@@ -1896,12 +1931,62 @@ struct ocrdma_rdma_stats_resp {
struct ocrdma_rx_dbg_stats rx_dbg_stats;
} __packed;
+enum {
+ OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_CV_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000,
+ OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000,
+ OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E,
+ OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000,
+ OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000,
+ OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF
+};
struct mgmt_hba_attribs {
u8 flashrom_version_string[32];
u8 manufacturer_name[32];
u32 supported_modes;
- u32 rsvd0[3];
+ u32 rsvd_eprom_verhi_verlo;
+ u32 mbx_ds_ver;
+ u32 epfw_ds_ver;
u8 ncsi_ver_string[12];
u32 default_extended_timeout;
u8 controller_model_number[32];
@@ -1914,34 +1999,26 @@ struct mgmt_hba_attribs {
u8 driver_version_string[32];
u8 fw_on_flash_version_string[32];
u32 functionalities_supported;
- u16 max_cdblength;
- u8 asic_revision;
- u8 generational_guid[16];
- u8 hba_port_count;
- u16 default_link_down_timeout;
- u8 iscsi_ver_min_max;
- u8 multifunction_device;
- u8 cache_valid;
- u8 hba_status;
- u8 max_domains_supported;
- u8 phy_port;
+ u32 guid0_asicrev_cdblen;
+ u8 generational_guid[12];
+ u32 portcnt_guid15;
+ u32 mfuncdev_iscsi_ldtout;
+ u32 ptpnum_maxdoms_hbast_cv;
u32 firmware_post_status;
u32 hba_mtu[8];
- u32 rsvd1[4];
+ u32 res_asicgen_iscsi_feaures;
+ u32 rsvd1[3];
};
struct mgmt_controller_attrib {
struct mgmt_hba_attribs hba_attribs;
- u16 pci_vendor_id;
- u16 pci_device_id;
- u16 pci_sub_vendor_id;
- u16 pci_sub_system_id;
- u8 pci_bus_number;
- u8 pci_device_number;
- u8 pci_function_number;
- u8 interface_type;
- u64 unique_identifier;
- u32 rsvd0[5];
+ u32 pci_did_vid;
+ u32 pci_ssid_svid;
+ u32 ityp_fnum_devnum_bnum;
+ u32 uid_hi;
+ u32 uid_lo;
+ u32 res_nnetfil;
+ u32 rsvd0[4];
};
struct ocrdma_get_ctrl_attribs_rsp {
@@ -1949,5 +2026,79 @@ struct ocrdma_get_ctrl_attribs_rsp {
struct mgmt_controller_attrib ctrl_attribs;
};
+#define OCRDMA_SUBSYS_DCBX 0x10
+
+enum OCRDMA_DCBX_OPCODE {
+ OCRDMA_CMD_GET_DCBX_CONFIG = 0x01
+};
+
+enum OCRDMA_DCBX_PARAM_TYPE {
+ OCRDMA_PARAMETER_TYPE_ADMIN = 0x00,
+ OCRDMA_PARAMETER_TYPE_OPER = 0x01,
+ OCRDMA_PARAMETER_TYPE_PEER = 0x02
+};
+
+enum OCRDMA_DCBX_APP_PROTO {
+ OCRDMA_APP_PROTO_ROCE = 0x8915
+};
+
+enum OCRDMA_DCBX_PROTO {
+ OCRDMA_PROTO_SELECT_L2 = 0x00,
+ OCRDMA_PROTO_SELECT_L4 = 0x01
+};
+
+enum OCRDMA_DCBX_APP_PARAM {
+ OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF,
+ OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF,
+ OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10,
+ OCRDMA_APP_PARAM_VALID_MASK = 0xFF,
+ OCRDMA_APP_PARAM_VALID_SHIFT = 0x18
+};
+
+enum OCRDMA_DCBX_STATE_FLAGS {
+ OCRDMA_STATE_FLAG_ENABLED = 0x01,
+ OCRDMA_STATE_FLAG_ADDVERTISED = 0x02,
+ OCRDMA_STATE_FLAG_WILLING = 0x04,
+ OCRDMA_STATE_FLAG_SYNC = 0x08,
+ OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000,
+ OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000
+};
+
+enum OCRDMA_TCV_AEV_OPV_ST {
+ OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF,
+ OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18,
+ OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10,
+ OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08,
+ OCRDMA_DCBX_STATE_MASK = 0xFF
+};
+
+struct ocrdma_app_parameter {
+ u32 valid_proto_app;
+ u32 oui;
+ u32 app_prio[2];
+};
+
+struct ocrdma_dcbx_cfg {
+ u32 tcv_aev_opv_st;
+ u32 tc_state;
+ u32 pfc_state;
+ u32 qcn_state;
+ u32 appl_state;
+ u32 ll_state;
+ u32 tc_bw[2];
+ u32 tc_prio[8];
+ u32 pfc_prio[2];
+ struct ocrdma_app_parameter app_param[15];
+};
+
+struct ocrdma_get_dcbx_cfg_req {
+ struct ocrdma_mbx_hdr hdr;
+ u32 param_type;
+} __packed;
+
+struct ocrdma_get_dcbx_cfg_rsp {
+ struct ocrdma_mbx_rsp hdr;
+ struct ocrdma_dcbx_cfg cfg;
+} __packed;
#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index edf6211d84b..acb434d1690 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
- attr->max_mr_size = ~0ull;
+ attr->max_mr_size = dev->attr.max_mr_size;
attr->page_size_cap = 0xffff000;
attr->vendor_id = dev->nic_info.pdev->vendor;
attr->vendor_part_id = dev->nic_info.pdev->device;
- attr->hw_ver = 0;
+ attr->hw_ver = dev->asic_id;
attr->max_qp = dev->attr.max_qp;
attr->max_ah = OCRDMA_MAX_AH;
attr->max_qp_wr = dev->attr.max_wqe;
@@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
pd->dpp_enabled =
ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
pd->num_dpp_qp =
- pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+ pd->dpp_enabled ? (dev->nic_info.db_page_size /
+ dev->attr.wqe_size) : 0;
}
retry:
@@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
struct ocrdma_pd *pd = uctx->cntxt_pd;
struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
- BUG_ON(uctx->pd_in_use);
+ if (uctx->pd_in_use) {
+ pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+ __func__, dev->id, pd->id);
+ }
uctx->cntxt_pd = NULL;
status = _ocrdma_dealloc_pd(dev, pd);
return status;
@@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
+
+ /* Don't stop cleanup, in case FW is unresponsive */
+ if (dev->mqe_ctx.fw_error_state) {
+ status = 0;
+ pr_err("%s(%d) fw not responding.\n",
+ __func__, dev->id);
+ }
return status;
}
@@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
while (wr) {
+ if (qp->qp_type == IB_QPT_UD &&
+ (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)) {
+ *bad_wr = wr;
+ status = -EINVAL;
+ break;
+ }
if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
wr->num_sge > qp->sq.max_sges) {
*bad_wr = wr;
@@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
*stop = true;
expand = false;
}
+ } else if (is_hw_sq_empty(qp)) {
+ /* Do nothing */
+ expand = false;
+ *polled = false;
+ *stop = false;
} else {
*polled = true;
expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
@@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
*stop = true;
expand = false;
}
+ } else if (is_hw_rq_empty(qp)) {
+ /* Do nothing */
+ expand = false;
+ *polled = false;
+ *stop = false;
} else {
*polled = true;
expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 22c720e5740..636be117b57 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev)
ibp = &dd->pport[p].ibport_data;
agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index c639f90cfda..3edce617c31 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,7 +86,6 @@ enum {
IPOIB_FLAG_INITIALIZED = 1,
IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3,
- IPOIB_PKEY_STOP = 4,
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
@@ -312,7 +311,6 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
- struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
@@ -473,10 +471,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
-int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_open(struct net_device *dev, int flush);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
@@ -532,8 +531,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf);
void ipoib_setup(struct net_device *dev);
-void ipoib_pkey_poll(struct work_struct *work);
-int ipoib_pkey_dev_delay_open(struct net_device *dev);
+void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 50061854616..6bd5740e269 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (priv->mcg_dentry)
- debugfs_remove(priv->mcg_dentry);
- if (priv->path_dentry)
- debugfs_remove(priv->path_dentry);
+ debugfs_remove(priv->mcg_dentry);
+ debugfs_remove(priv->path_dentry);
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 6a7003ddb0b..72626c34817 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
drain_tx_cq((struct net_device *)ctx);
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_open(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
- ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
return -1;
}
- set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = ipoib_init_qp(dev);
if (ret) {
@@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_stop(dev, flush);
return -1;
}
-static void ipoib_pkey_dev_check_presence(struct net_device *dev)
+void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- u16 pkey_index = 0;
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+ if (!(priv->pkey & 0x7fff) ||
+ ib_find_pkey(priv->ca, priv->port, priv->pkey,
+ &priv->pkey_index))
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
else
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
@@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
- /* Shutdown the P_Key thread if still active */
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- mutex_lock(&pkey_mutex);
- set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work_sync(&priv->pkey_poll_task);
- mutex_unlock(&pkey_mutex);
- }
-
ipoib_mcast_stop_thread(dev, flush);
ipoib_mcast_dev_flush(dev);
@@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
+ if (ipoib_ib_dev_open(dev, 1)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
@@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
return 1;
}
+/*
+ * returns 0 if pkey value was found in a different slot.
+ */
+static inline int update_child_pkey(struct ipoib_dev_priv *priv)
+{
+ u16 old_index = priv->pkey_index;
+
+ priv->pkey_index = 0;
+ ipoib_pkey_dev_check_presence(priv->dev);
+
+ if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+ (old_index == priv->pkey_index))
+ return 1;
+ return 0;
+}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level)
{
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
- u16 new_index;
int result;
down_read(&priv->vlan_rwsem);
@@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
up_read(&priv->vlan_rwsem);
- if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
- /* for non-child devices must check/update the pkey value here */
- if (level == IPOIB_FLUSH_HEAVY &&
- !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
- update_parent_pkey(priv);
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
+ level != IPOIB_FLUSH_HEAVY) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
+ /* interface is down. update pkey and leave. */
+ if (level == IPOIB_FLUSH_HEAVY) {
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+ update_parent_pkey(priv);
+ else
+ update_child_pkey(priv);
+ }
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
return;
}
@@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
* (parent) devices should always takes what present in pkey index 0
*/
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- ipoib_ib_dev_down(dev, 0);
- ipoib_ib_dev_stop(dev, 0);
- if (ipoib_pkey_dev_delay_open(dev))
- return;
- }
- /* restart QP only if P_Key index is changed */
- if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
- new_index == priv->pkey_index) {
+ result = update_child_pkey(priv);
+ if (result) {
+ /* restart QP only if P_Key index is changed */
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
- priv->pkey_index = new_index;
+
} else {
result = update_parent_pkey(priv);
/* restart QP only if P_Key value changed */
@@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_ib_dev_down(dev, 0);
if (level == IPOIB_FLUSH_HEAVY) {
- ipoib_ib_dev_stop(dev, 0);
- ipoib_ib_dev_open(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ ipoib_ib_dev_stop(dev, 0);
+ if (ipoib_ib_dev_open(dev, 0) != 0)
+ return;
+ if (netif_queue_stopped(dev))
+ netif_start_queue(dev);
}
/*
@@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
ipoib_transport_dev_cleanup(dev);
}
-/*
- * Delayed P_Key Assigment Interim Support
- *
- * The following is initial implementation of delayed P_Key assigment
- * mechanism. It is using the same approach implemented for the multicast
- * group join. The single goal of this implementation is to quickly address
- * Bug #2507. This implementation will probably be removed when the P_Key
- * change async notification is available.
- */
-
-void ipoib_pkey_poll(struct work_struct *work)
-{
- struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
- struct net_device *dev = priv->dev;
-
- ipoib_pkey_dev_check_presence(dev);
-
- if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
- ipoib_open(dev);
- else {
- mutex_lock(&pkey_mutex);
- if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->pkey_poll_task,
- HZ);
- mutex_unlock(&pkey_mutex);
- }
-}
-
-int ipoib_pkey_dev_delay_open(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- /* Look for the interface pkey value in the IB Port P_Key table and */
- /* set the interface pkey assigment flag */
- ipoib_pkey_dev_check_presence(dev);
- /* P_Key value not assigned yet - start polling */
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- mutex_lock(&pkey_mutex);
- clear_bit(IPOIB_PKEY_STOP, &priv->flags);
- queue_delayed_work(ipoib_workqueue,
- &priv->pkey_poll_task,
- HZ);
- mutex_unlock(&pkey_mutex);
- return 1;
- }
-
- return 0;
-}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4e675f4fecc..1310acf6bf9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- if (ipoib_pkey_dev_delay_open(dev))
- return 0;
-
- if (ipoib_ib_dev_open(dev))
+ if (ipoib_ib_dev_open(dev, 1)) {
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+ return 0;
goto err_disable;
+ }
if (ipoib_ib_dev_up(dev))
goto err_stop;
@@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
- INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index eb7973957a6..61ee91d8838 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -596,20 +596,28 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
- ep = iscsi_create_endpoint(sizeof(*ib_conn));
+ ep = iscsi_create_endpoint(0);
if (!ep)
return ERR_PTR(-ENOMEM);
- ib_conn = ep->dd_data;
+ ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL);
+ if (!ib_conn) {
+ err = -ENOMEM;
+ goto failure;
+ }
+
+ ep->dd_data = ib_conn;
ib_conn->ep = ep;
iser_conn_init(ib_conn);
- err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
- non_blocking);
+ err = iser_connect(ib_conn, NULL, dst_addr, non_blocking);
if (err)
- return ERR_PTR(err);
+ goto failure;
return ep;
+failure:
+ iscsi_destroy_endpoint(ep);
+ return ERR_PTR(err);
}
static int
@@ -619,15 +627,16 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
int rc;
ib_conn = ep->dd_data;
- rc = wait_event_interruptible_timeout(ib_conn->wait,
- ib_conn->state == ISER_CONN_UP,
- msecs_to_jiffies(timeout_ms));
-
+ rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion,
+ msecs_to_jiffies(timeout_ms));
/* if conn establishment failed, return error code to iscsi */
- if (!rc &&
- (ib_conn->state == ISER_CONN_TERMINATING ||
- ib_conn->state == ISER_CONN_DOWN))
- rc = -1;
+ if (rc == 0) {
+ mutex_lock(&ib_conn->state_mutex);
+ if (ib_conn->state == ISER_CONN_TERMINATING ||
+ ib_conn->state == ISER_CONN_DOWN)
+ rc = -1;
+ mutex_unlock(&ib_conn->state_mutex);
+ }
iser_info("ib conn %p rc = %d\n", ib_conn, rc);
@@ -646,19 +655,25 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
ib_conn = ep->dd_data;
iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
+ mutex_lock(&ib_conn->state_mutex);
iser_conn_terminate(ib_conn);
/*
- * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
- * call and ISER_CONN_DOWN state before freeing the iser resources.
- * otherwise we are safe to free resources immediately.
+ * if iser_conn and iscsi_conn are bound, we must wait for
+ * iscsi_conn_stop and flush errors completion before freeing
+ * the iser resources. Otherwise we are safe to free resources
+ * immediately.
*/
if (ib_conn->iscsi_conn) {
INIT_WORK(&ib_conn->release_work, iser_release_work);
queue_work(release_wq, &ib_conn->release_work);
+ mutex_unlock(&ib_conn->state_mutex);
} else {
+ ib_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&ib_conn->state_mutex);
iser_conn_release(ib_conn);
}
+ iscsi_destroy_endpoint(ep);
}
static umode_t iser_attr_is_visible(int param_type, int param)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 97cd385bf7f..c877dad381c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -326,7 +326,6 @@ struct iser_conn {
struct iser_device *device; /* device context */
struct rdma_cm_id *cma_id; /* CMA ID */
struct ib_qp *qp; /* QP */
- wait_queue_head_t wait; /* waitq for conn/disconn */
unsigned qp_max_recv_dtos; /* num of rx buffers */
unsigned qp_max_recv_dtos_mask; /* above minus 1 */
unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
@@ -335,6 +334,9 @@ struct iser_conn {
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
struct completion stop_completion;
+ struct mutex state_mutex;
+ struct completion flush_completion;
+ struct completion up_completion;
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -448,8 +450,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr_in *src_addr,
- struct sockaddr_in *dst_addr,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
int non_blocking);
int iser_reg_page_vec(struct iser_conn *ib_conn,
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ea01075f9f9..3ef167f97d6 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -491,10 +491,9 @@ out_err:
}
/**
- * releases the QP objects, returns 0 on success,
- * -1 on failure
+ * releases the QP object
*/
-static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
+static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
{
int cq_index;
BUG_ON(ib_conn == NULL);
@@ -513,8 +512,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
}
ib_conn->qp = NULL;
-
- return 0;
}
/**
@@ -568,31 +565,40 @@ static void iser_device_try_release(struct iser_device *device)
mutex_unlock(&ig.device_list_mutex);
}
+/**
+ * Called with state mutex held
+ **/
static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
enum iser_ib_conn_state comp,
enum iser_ib_conn_state exch)
{
int ret;
- spin_lock_bh(&ib_conn->lock);
if ((ret = (ib_conn->state == comp)))
ib_conn->state = exch;
- spin_unlock_bh(&ib_conn->lock);
return ret;
}
void iser_release_work(struct work_struct *work)
{
struct iser_conn *ib_conn;
+ int rc;
ib_conn = container_of(work, struct iser_conn, release_work);
/* wait for .conn_stop callback */
- wait_for_completion(&ib_conn->stop_completion);
+ rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
+ WARN_ON(rc == 0);
/* wait for the qp`s post send and post receive buffers to empty */
- wait_event_interruptible(ib_conn->wait,
- ib_conn->state == ISER_CONN_DOWN);
+ rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
+ WARN_ON(rc == 0);
+
+ ib_conn->state = ISER_CONN_DOWN;
+
+ mutex_lock(&ib_conn->state_mutex);
+ ib_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&ib_conn->state_mutex);
iser_conn_release(ib_conn);
}
@@ -604,23 +610,27 @@ void iser_conn_release(struct iser_conn *ib_conn)
{
struct iser_device *device = ib_conn->device;
- BUG_ON(ib_conn->state == ISER_CONN_UP);
-
mutex_lock(&ig.connlist_mutex);
list_del(&ib_conn->conn_list);
mutex_unlock(&ig.connlist_mutex);
+
+ mutex_lock(&ib_conn->state_mutex);
+ BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+
iser_free_rx_descriptors(ib_conn);
iser_free_ib_conn_res(ib_conn);
ib_conn->device = NULL;
/* on EVENT_ADDR_ERROR there's no device yet for this conn */
if (device != NULL)
iser_device_try_release(device);
+ mutex_unlock(&ib_conn->state_mutex);
+
/* if cma handler context, the caller actually destroy the id */
if (ib_conn->cma_id != NULL) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
- iscsi_destroy_endpoint(ib_conn->ep);
+ kfree(ib_conn);
}
/**
@@ -642,22 +652,31 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
ib_conn,err);
}
+/**
+ * Called with state mutex held
+ **/
static void iser_connect_error(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
ib_conn = (struct iser_conn *)cma_id->context;
-
ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
}
+/**
+ * Called with state mutex held
+ **/
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
struct iser_conn *ib_conn;
int ret;
+ ib_conn = (struct iser_conn *)cma_id->context;
+ if (ib_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
device = iser_device_find_by_ib_device(cma_id);
if (!device) {
iser_err("device lookup/creation failed\n");
@@ -665,7 +684,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
return;
}
- ib_conn = (struct iser_conn *)cma_id->context;
ib_conn->device = device;
/* connection T10-PI support */
@@ -689,18 +707,27 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
}
}
+/**
+ * Called with state mutex held
+ **/
static void iser_route_handler(struct rdma_cm_id *cma_id)
{
struct rdma_conn_param conn_param;
int ret;
struct iser_cm_hdr req_hdr;
+ struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
+ struct iser_device *device = ib_conn->device;
+
+ if (ib_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
if (ret)
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = 4;
+ conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
@@ -728,12 +755,16 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
+ ib_conn = (struct iser_conn *)cma_id->context;
+ if (ib_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
- ib_conn = (struct iser_conn *)cma_id->context;
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
- wake_up_interruptible(&ib_conn->wait);
+ ib_conn->state = ISER_CONN_UP;
+ complete(&ib_conn->up_completion);
}
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
@@ -752,19 +783,25 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
iser_err("iscsi_iser connection isn't bound\n");
}
- /* Complete the termination process if no posts are pending */
+ /* Complete the termination process if no posts are pending. This code
+ * block also exists in iser_handle_comp_error(), but it is needed here
+ * for cases of no flushes at all, e.g. discovery over rdma.
+ */
if (ib_conn->post_recv_buf_count == 0 &&
(atomic_read(&ib_conn->post_send_buf_count) == 0)) {
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
+ complete(&ib_conn->flush_completion);
}
}
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
+ struct iser_conn *ib_conn;
+
+ ib_conn = (struct iser_conn *)cma_id->context;
iser_info("event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
+ mutex_lock(&ib_conn->state_mutex);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
iser_addr_handler(cma_id);
@@ -785,24 +822,28 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
iser_disconnected_handler(cma_id);
break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
break;
}
+ mutex_unlock(&ib_conn->state_mutex);
return 0;
}
void iser_conn_init(struct iser_conn *ib_conn)
{
ib_conn->state = ISER_CONN_INIT;
- init_waitqueue_head(&ib_conn->wait);
ib_conn->post_recv_buf_count = 0;
atomic_set(&ib_conn->post_send_buf_count, 0);
init_completion(&ib_conn->stop_completion);
+ init_completion(&ib_conn->flush_completion);
+ init_completion(&ib_conn->up_completion);
INIT_LIST_HEAD(&ib_conn->conn_list);
spin_lock_init(&ib_conn->lock);
+ mutex_init(&ib_conn->state_mutex);
}
/**
@@ -810,22 +851,21 @@ void iser_conn_init(struct iser_conn *ib_conn)
* sleeps until the connection is established or rejected
*/
int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr_in *src_addr,
- struct sockaddr_in *dst_addr,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
int non_blocking)
{
- struct sockaddr *src, *dst;
int err = 0;
- sprintf(ib_conn->name, "%pI4:%d",
- &dst_addr->sin_addr.s_addr, dst_addr->sin_port);
+ mutex_lock(&ib_conn->state_mutex);
+
+ sprintf(ib_conn->name, "%pISp", dst_addr);
+
+ iser_info("connecting to: %s\n", ib_conn->name);
/* the device is known only --after-- address resolution */
ib_conn->device = NULL;
- iser_info("connecting to: %pI4, port 0x%x\n",
- &dst_addr->sin_addr, dst_addr->sin_port);
-
ib_conn->state = ISER_CONN_PENDING;
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
@@ -837,23 +877,21 @@ int iser_connect(struct iser_conn *ib_conn,
goto id_failure;
}
- src = (struct sockaddr *)src_addr;
- dst = (struct sockaddr *)dst_addr;
- err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
+ err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
if (err) {
iser_err("rdma_resolve_addr failed: %d\n", err);
goto addr_failure;
}
if (!non_blocking) {
- wait_event_interruptible(ib_conn->wait,
- (ib_conn->state != ISER_CONN_PENDING));
+ wait_for_completion_interruptible(&ib_conn->up_completion);
if (ib_conn->state != ISER_CONN_UP) {
err = -EIO;
goto connect_failure;
}
}
+ mutex_unlock(&ib_conn->state_mutex);
mutex_lock(&ig.connlist_mutex);
list_add(&ib_conn->conn_list, &ig.connlist);
@@ -865,6 +903,7 @@ id_failure:
addr_failure:
ib_conn->state = ISER_CONN_DOWN;
connect_failure:
+ mutex_unlock(&ib_conn->state_mutex);
iser_conn_release(ib_conn);
return err;
}
@@ -1049,18 +1088,19 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
if (ib_conn->post_recv_buf_count == 0 &&
atomic_read(&ib_conn->post_send_buf_count) == 0) {
- /* getting here when the state is UP means that the conn is *
- * being terminated asynchronously from the iSCSI layer's *
- * perspective. */
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING))
+ /**
+ * getting here when the state is UP means that the conn is
+ * being terminated asynchronously from the iSCSI layer's
+ * perspective. It is safe to peek at the connection state
+ * since iscsi_conn_failure is allowed to be called twice.
+ **/
+ if (ib_conn->state == ISER_CONN_UP)
iscsi_conn_failure(ib_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
/* no more non completed posts to the QP, complete the
* termination process w.o worrying on disconnect event */
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
+ complete(&ib_conn->flush_completion);
}
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e3c2c5b4297..62d2a18e1b4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
+static struct workqueue_struct *srp_remove_wq;
static struct ib_client srp_client = {
.name = "srp",
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
spin_unlock_irq(&target->lock);
if (changed)
- queue_work(system_long_wq, &target->remove_work);
+ queue_work(srp_remove_wq, &target->remove_work);
return changed;
}
@@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
SCSI_SENSE_BUFFERSIZE));
}
- if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
- scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
- else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+ if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
+ scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
+ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
+ scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
srp_free_req(target, req, scmnd,
be32_to_cpu(rsp->req_lim_delta));
@@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device)
spin_unlock(&host->target_lock);
/*
- * Wait for target port removal tasks.
+ * Wait for tl_err and target port removal tasks.
*/
flush_workqueue(system_long_wq);
+ flush_workqueue(srp_remove_wq);
kfree(host);
}
@@ -3313,16 +3319,22 @@ static int __init srp_init_module(void)
indirect_sg_entries = cmd_sg_entries;
}
+ srp_remove_wq = create_workqueue("srp_remove");
+ if (!srp_remove_wq) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = -ENOMEM;
ib_srp_transport_template =
srp_attach_transport(&ib_srp_transport_functions);
if (!ib_srp_transport_template)
- return -ENOMEM;
+ goto destroy_wq;
ret = class_register(&srp_class);
if (ret) {
pr_err("couldn't register class infiniband_srp\n");
- srp_release_transport(ib_srp_transport_template);
- return ret;
+ goto release_tr;
}
ib_sa_register_client(&srp_sa_client);
@@ -3330,13 +3342,22 @@ static int __init srp_init_module(void)
ret = ib_register_client(&srp_client);
if (ret) {
pr_err("couldn't register IB client\n");
- srp_release_transport(ib_srp_transport_template);
- ib_sa_unregister_client(&srp_sa_client);
- class_unregister(&srp_class);
- return ret;
+ goto unreg_sa;
}
- return 0;
+out:
+ return ret;
+
+unreg_sa:
+ ib_sa_unregister_client(&srp_sa_client);
+ class_unregister(&srp_class);
+
+release_tr:
+ srp_release_transport(ib_srp_transport_template);
+
+destroy_wq:
+ destroy_workqueue(srp_remove_wq);
+ goto out;
}
static void __exit srp_cleanup_module(void)
@@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void)
ib_sa_unregister_client(&srp_sa_client);
class_unregister(&srp_class);
srp_release_transport(ib_srp_transport_template);
+ destroy_workqueue(srp_remove_wq);
}
module_init(srp_init_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index fe09f2788b1..d28a8c284da 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
case IB_EVENT_PKEY_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
+ case IB_EVENT_GID_CHANGE:
/* Refresh port data asynchronously. */
if (event->element.port_num <= sdev->device->phys_port_cnt) {
sport = &sdev->port[event->element.port_num - 1];
@@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
&reg_req, 0,
srpt_mad_send_handler,
srpt_mad_recv_handler,
- sport);
+ sport, 0);
if (IS_ERR(sport->mad_agent)) {
ret = PTR_ERR(sport->mad_agent);
sport->mad_agent = NULL;
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 443d03fbac4..8eeab72b93e 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg)
mutex_unlock(&ca->set->bucket_lock);
blkdev_issue_discard(ca->bdev,
bucket_to_sector(ca->set, bucket),
- ca->sb.block_size, GFP_KERNEL, 0);
+ ca->sb.bucket_size, GFP_KERNEL, 0);
mutex_lock(&ca->set->bucket_lock);
}
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d2ebcf32309..04f7bc28ef8 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -477,9 +477,13 @@ struct gc_stat {
* CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
* we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
* flushing dirty data).
+ *
+ * CACHE_SET_RUNNING means all cache devices have been registered and journal
+ * replay is complete.
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
+#define CACHE_SET_RUNNING 2
struct cache_set {
struct closure cl;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 54541641530..646fe85261c 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
{
uint64_t start_time;
bool used_mempool = false;
- struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
+ struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
order);
if (!out) {
struct page *outp;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 5f6728d5d4d..ae964624efb 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l,
{
return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
KEY_PTRS(l) == KEY_PTRS(r) &&
- KEY_CSUM(l) == KEY_CSUM(l));
+ KEY_CSUM(l) == KEY_CSUM(r));
}
/* Keylists */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7347b610096..00cde40db57 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -117,9 +117,9 @@
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
- struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\
+ struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
+ _w, b); \
if (!IS_ERR(_child)) { \
- _child->parent = (b); \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
rw_unlock(_w, _child); \
} else \
@@ -142,7 +142,6 @@
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) { \
- _b->parent = NULL; \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
} \
rw_unlock(_w, _b); \
@@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b)
struct bset *i = btree_bset_first(b);
struct btree_iter *iter;
- iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
+ iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
iter->used = 0;
@@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b)
SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
bset_sector_offset(&b->keys, i));
- if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
+ if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
int j;
struct bio_vec *bv;
void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -967,7 +966,8 @@ err:
* level and op->lock.
*/
struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
- struct bkey *k, int level, bool write)
+ struct bkey *k, int level, bool write,
+ struct btree *parent)
{
int i = 0;
struct btree *b;
@@ -1002,6 +1002,7 @@ retry:
BUG_ON(b->level != level);
}
+ b->parent = parent;
b->accessed = 1;
for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
@@ -1022,15 +1023,16 @@ retry:
return b;
}
-static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
+static void btree_node_prefetch(struct btree *parent, struct bkey *k)
{
struct btree *b;
- mutex_lock(&c->bucket_lock);
- b = mca_alloc(c, NULL, k, level);
- mutex_unlock(&c->bucket_lock);
+ mutex_lock(&parent->c->bucket_lock);
+ b = mca_alloc(parent->c, NULL, k, parent->level - 1);
+ mutex_unlock(&parent->c->bucket_lock);
if (!IS_ERR_OR_NULL(b)) {
+ b->parent = parent;
bch_btree_node_read(b);
rw_unlock(true, b);
}
@@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b)
mutex_unlock(&b->c->bucket_lock);
}
-struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
- int level)
+struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ int level, bool wait,
+ struct btree *parent)
{
BKEY_PADDED(key) k;
struct btree *b = ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock);
retry:
- if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL))
+ if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
goto err;
bkey_put(c, &k.key);
@@ -1085,6 +1088,7 @@ retry:
}
b->accessed = 1;
+ b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
mutex_unlock(&c->bucket_lock);
@@ -1096,14 +1100,21 @@ err_free:
err:
mutex_unlock(&c->bucket_lock);
- trace_bcache_btree_node_alloc_fail(b);
+ trace_bcache_btree_node_alloc_fail(c);
return b;
}
+static struct btree *bch_btree_node_alloc(struct cache_set *c,
+ struct btree_op *op, int level,
+ struct btree *parent)
+{
+ return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
+}
+
static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
- struct btree *n = bch_btree_node_alloc(b->c, op, b->level);
+ struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
BUG_ON(btree_bset_first(new_nodes[0])->keys);
btree_node_free(new_nodes[0]);
rw_unlock(true, new_nodes[0]);
+ new_nodes[0] = NULL;
for (i = 0; i < nodes; i++) {
if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
@@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
- true);
+ true, b);
if (IS_ERR(r->b)) {
ret = PTR_ERR(r->b);
break;
@@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad);
if (k)
- btree_node_prefetch(b->c, k, b->level - 1);
+ btree_node_prefetch(b, k);
if (p)
ret = btree(check_recurse, p, b, op);
@@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op,
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
- n2 = bch_btree_node_alloc(b->c, op, b->level);
+ n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
if (IS_ERR(n2))
goto err_free1;
if (!b->parent) {
- n3 = bch_btree_node_alloc(b->c, op, b->level + 1);
+ n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
if (IS_ERR(n3))
goto err_free2;
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 91dfa5e6968..5c391fa01be 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *);
-struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
+struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
+ int, bool, struct btree *);
struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
- struct bkey *, int, bool);
+ struct bkey *, int, bool, struct btree *);
int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 3a0de4cf977..243de0bf15c 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -474,9 +474,8 @@ out:
return false;
}
-static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
{
- struct btree *b = container_of(bk, struct btree, keys);
char buf[80];
if (!KEY_SIZE(k))
@@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
if (KEY_SIZE(k) > KEY_OFFSET(k))
goto bad;
- if (__ptr_invalid(b->c, k))
+ if (__ptr_invalid(c, k))
goto bad;
return false;
bad:
bch_extent_to_text(buf, sizeof(buf), k);
- cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
+ cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
return true;
}
+static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+{
+ struct btree *b = container_of(bk, struct btree, keys);
+ return __bch_extent_invalid(b->c, k);
+}
+
static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
unsigned ptr)
{
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index e4e23409782..e2ed54054e7 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -9,5 +9,6 @@ struct cache_set;
void bch_extent_to_text(char *, size_t, const struct bkey *);
bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
+bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 59e82021b5b..fe080ad0e55 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -7,6 +7,7 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
+#include "extents.h"
#include <trace/events/bcache.h>
@@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
if (read_bucket(l))
goto bsearch;
- if (list_empty(list))
+ /* no journal entries on this device? */
+ if (l == ca->sb.njournal_buckets)
continue;
bsearch:
+ BUG_ON(list_empty(list));
+
/* Binary search */
- m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
+ m = l;
+ r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
pr_debug("starting binary search, l %u r %u", l, r);
while (l + 1 < r) {
@@ -291,15 +296,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
for (k = i->j.start;
k < bset_bkey_last(&i->j);
- k = bkey_next(k)) {
- unsigned j;
+ k = bkey_next(k))
+ if (!__bch_extent_invalid(c, k)) {
+ unsigned j;
- for (j = 0; j < KEY_PTRS(k); j++)
- if (ptr_available(c, k, j))
- atomic_inc(&PTR_BUCKET(c, k, j)->pin);
+ for (j = 0; j < KEY_PTRS(k); j++)
+ if (ptr_available(c, k, j))
+ atomic_inc(&PTR_BUCKET(c, k, j)->pin);
- bch_initial_mark_key(c, 0, k);
- }
+ bch_initial_mark_key(c, 0, k);
+ }
}
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 15fff4f68a7..62e6e98186b 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl)
{
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
- trace_bcache_write(op->bio, op->writeback, op->bypass);
+ trace_bcache_write(op->c, op->inode, op->bio,
+ op->writeback, op->bypass);
bch_keylist_init(&op->insert_keys);
bio_get(op->bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 926ded8ccbf..d4713d098a3 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d)
static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
unsigned id)
{
- BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags));
-
d->id = id;
d->c = c;
c->devices[id] = d;
@@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
list_move(&dc->list, &uncached_devices);
clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
+ clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
mutex_unlock(&bch_register_lock);
@@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
*/
atomic_set(&dc->count, 1);
+ if (bch_cached_dev_writeback_start(dc))
+ return -ENOMEM;
+
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(dc);
atomic_set(&dc->has_dirty, 1);
@@ -1070,7 +1072,8 @@ static void cached_dev_free(struct closure *cl)
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
cancel_delayed_work_sync(&dc->writeback_rate_update);
- kthread_stop(dc->writeback_thread);
+ if (!IS_ERR_OR_NULL(dc->writeback_thread))
+ kthread_stop(dc->writeback_thread);
mutex_lock(&bch_register_lock);
@@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
- if (!IS_ERR_OR_NULL(dc->bdev)) {
- if (dc->bdev->bd_disk)
- blk_sync_queue(bdev_get_queue(dc->bdev));
-
+ if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
- }
wake_up(&unregister_wait);
@@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj)
static void flash_dev_free(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ mutex_lock(&bch_register_lock);
bcache_device_free(d);
+ mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
}
@@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ mutex_lock(&bch_register_lock);
bcache_device_unlink(d);
+ mutex_unlock(&bch_register_lock);
kobject_del(&d->kobj);
continue_at(cl, flash_dev_free, system_wq);
}
@@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
if (test_bit(CACHE_SET_STOPPING, &c->flags))
return -EINTR;
+ if (!test_bit(CACHE_SET_RUNNING, &c->flags))
+ return -EPERM;
+
u = uuid_find_empty(c);
if (!u) {
pr_err("Can't create volume, no room for UUID");
@@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl)
bch_journal_free(c);
for_each_cache(ca, c, i)
- if (ca)
+ if (ca) {
+ ca->set = NULL;
+ c->cache[ca->sb.nr_this_dev] = NULL;
kobject_put(&ca->kobj);
+ }
bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
@@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl)
if (ca->alloc_thread)
kthread_stop(ca->alloc_thread);
- cancel_delayed_work_sync(&c->journal.work);
- /* flush last journal entry if needed */
- c->journal.work.work.func(&c->journal.work.work);
+ if (c->journal.cur) {
+ cancel_delayed_work_sync(&c->journal.work);
+ /* flush last journal entry if needed */
+ c->journal.work.work.func(&c->journal.work.work);
+ }
closure_return(cl);
}
@@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
- c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
+ c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "cannot allocate new btree root";
- c->root = bch_btree_node_alloc(c, NULL, 0);
+ c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c)
flash_devs_run(c);
+ set_bit(CACHE_SET_RUNNING, &c->flags);
return;
err:
closure_sync(&cl);
@@ -1760,6 +1772,7 @@ found:
pr_debug("set version = %llu", c->sb.version);
}
+ kobject_get(&ca->kobj);
ca->set = c;
ca->set->cache[ca->sb.nr_this_dev] = ca;
c->cache_by_alloc[c->caches_loaded++] = ca;
@@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj)
struct cache *ca = container_of(kobj, struct cache, kobj);
unsigned i;
- if (ca->set)
+ if (ca->set) {
+ BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
ca->set->cache[ca->sb.nr_this_dev] = NULL;
+ }
bio_split_pool_free(&ca->bio_split_hook);
@@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj)
if (ca->sb_bio.bi_inline_vecs[0].bv_page)
put_page(ca->sb_bio.bi_io_vec[0].bv_page);
- if (!IS_ERR_OR_NULL(ca->bdev)) {
- blk_sync_queue(bdev_get_queue(ca->bdev));
+ if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
- }
kfree(ca);
module_put(THIS_MODULE);
@@ -1844,7 +1857,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
}
static void register_cache(struct cache_sb *sb, struct page *sb_page,
- struct block_device *bdev, struct cache *ca)
+ struct block_device *bdev, struct cache *ca)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
@@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
pr_info("registered cache device %s", bdevname(bdev, name));
+out:
+ kobject_put(&ca->kobj);
return;
err:
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
- kobject_put(&ca->kobj);
+ goto out;
}
/* Global interfaces/init */
@@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) {
bdev = lookup_bdev(strim(path));
+ mutex_lock(&bch_register_lock);
if (!IS_ERR(bdev) && bch_is_open(bdev))
err = "device already registered";
else
err = "device busy";
+ mutex_unlock(&bch_register_lock);
}
goto err;
}
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index ac7d0d1f70d..98df7572b5f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -416,8 +416,8 @@ do { \
average_frequency, frequency_units); \
__print_time_stat(stats, name, \
average_duration, duration_units); \
- __print_time_stat(stats, name, \
- max_duration, duration_units); \
+ sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
+ div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
\
sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
? div_s64(local_clock() - (stats)->last, \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f4300e4c011..f1986bcd1bf 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
if (KEY_START(&w->key) != dc->last_read ||
jiffies_to_msecs(delay) > 50)
while (!kthread_should_stop() && delay)
- delay = schedule_timeout_uninterruptible(delay);
+ delay = schedule_timeout_interruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
@@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg)
while (delay &&
!kthread_should_stop() &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_uninterruptible(delay);
+ delay = schedule_timeout_interruptible(delay);
}
}
@@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
}
-int bch_cached_dev_writeback_init(struct cached_dev *dc)
+void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
sema_init(&dc->in_flight, 64);
init_rwsem(&dc->writeback_lock);
@@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_d_term = 30;
dc->writeback_rate_p_term_inverse = 6000;
+ INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
+}
+
+int bch_cached_dev_writeback_start(struct cached_dev *dc)
+{
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
if (IS_ERR(dc->writeback_thread))
return PTR_ERR(dc->writeback_thread);
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
+ bch_writeback_queue(dc);
+
return 0;
}
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index e2f8598937a..0a9dab187b7 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
void bch_sectors_dirty_init(struct cached_dev *dc);
-int bch_cached_dev_writeback_init(struct cached_dev *);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+int bch_cached_dev_writeback_start(struct cached_dev *);
#endif
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index d2899e7eb3a..06709257add 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
- disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
+ disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0);
@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
bool may_format_device)
{
int r;
- cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
+ cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_METADATA_CACHE_SIZE,
CACHE_MAX_CONCURRENT_LOCKS);
if (IS_ERR(cmd->bm)) {
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index cd70a78623a..7383c90ccdb 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -9,19 +9,17 @@
#include "dm-cache-block-types.h"
#include "dm-cache-policy-internal.h"
+#include "persistent-data/dm-space-map-metadata.h"
/*----------------------------------------------------------------*/
-#define DM_CACHE_METADATA_BLOCK_SIZE 4096
+#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
/* FIXME: remove this restriction */
/*
* The metadata device is currently limited in size.
- *
- * We have one block of index, which can hold 255 index entries. Each
- * index entry contains allocation info about 16k metadata blocks.
*/
-#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
+#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
/*
* A metadata device larger than 16GB triggers a warning.
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2c63326638b..1af40ee209e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
}
+/*
+ * You must increment the deferred set whilst the prison cell is held. To
+ * encourage this, we ask for 'cell' to be passed in.
+ */
+static void inc_ds(struct cache *cache, struct bio *bio,
+ struct dm_bio_prison_cell *cell)
+{
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+ BUG_ON(!cell);
+ BUG_ON(pb->all_io_entry);
+
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+}
+
static void issue(struct cache *cache, struct bio *bio)
{
unsigned long flags;
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags);
}
+static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+{
+ inc_ds(cache, bio, cell);
+ issue(cache, bio);
+}
+
static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+
+ /*
+ * No need to inc_ds() here, since the cell will be held for the
+ * duration of the io.
+ */
generic_make_request(bio);
}
@@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
return;
INIT_LIST_HEAD(&work);
- if (pb->all_io_entry)
- dm_deferred_entry_dec(pb->all_io_entry, &work);
+ dm_deferred_entry_dec(pb->all_io_entry, &work);
if (!list_empty(&work))
queue_quiesced_migrations(cache, &work);
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
else
remap_to_cache(cache, bio, 0);
+ /*
+ * REQ_FLUSH is not directed at any particular block so we don't
+ * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
+ * by dm-core.
+ */
issue(cache, bio);
}
@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss);
}
-static void issue_cache_bio(struct cache *cache, struct bio *bio,
- struct per_bio_data *pb,
- dm_oblock_t oblock, dm_cblock_t cblock)
-{
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
- remap_to_cache_dirty(cache, bio, oblock, cblock);
- issue(cache, bio);
-}
-
static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
dm_oblock_t block = get_bio_block(cache, bio);
struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
struct policy_result lookup_result;
- size_t pb_data_size = get_per_bio_data_size(cache);
- struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
bool passthrough = passthrough_mode(&cache->features);
bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
} else {
/* FIXME: factor out issue_origin() */
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
- issue(cache, bio);
+ inc_and_issue(cache, bio, new_ocell);
}
} else {
inc_hit_counter(cache, bio);
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
if (bio_data_dir(bio) == WRITE &&
writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock)) {
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- issue(cache, bio);
- } else
- issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+ inc_and_issue(cache, bio, new_ocell);
+
+ } else {
+ remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ inc_and_issue(cache, bio, new_ocell);
+ }
}
break;
case POLICY_MISS:
inc_miss_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
- issue(cache, bio);
+ inc_and_issue(cache, bio, new_ocell);
break;
case POLICY_NEW:
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
bio_list_init(&cache->deferred_flush_bios);
spin_unlock_irqrestore(&cache->lock, flags);
+ /*
+ * These bios have already been through inc_ds()
+ */
while ((bio = bio_list_pop(&bios)))
submit_bios ? generic_make_request(bio) : bio_io_error(bio);
}
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
bio_list_init(&cache->deferred_writethrough_bios);
spin_unlock_irqrestore(&cache->lock, flags);
+ /*
+ * These bios have already been through inc_ds()
+ */
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
@@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws)
if (commit_if_needed(cache)) {
process_deferred_flush_bios(cache, false);
+ process_migrations(cache, &cache->need_commit_migrations, migration_failure);
/*
* FIXME: rollback metadata or just go into a
@@ -2406,16 +2433,13 @@ out:
return r;
}
-static int cache_map(struct dm_target *ti, struct bio *bio)
+static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
{
- struct cache *cache = ti->private;
-
int r;
dm_oblock_t block = get_bio_block(cache, bio);
size_t pb_data_size = get_per_bio_data_size(cache);
bool can_migrate = false;
bool discarded_block;
- struct dm_bio_prison_cell *cell;
struct policy_result lookup_result;
struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
@@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
/*
* Check to see if that block is currently migrating.
*/
- cell = alloc_prison_cell(cache);
- if (!cell) {
+ *cell = alloc_prison_cell(cache);
+ if (!*cell) {
defer_bio(cache, bio);
return DM_MAPIO_SUBMITTED;
}
- r = bio_detain(cache, block, bio, cell,
+ r = bio_detain(cache, block, bio, *cell,
(cell_free_fn) free_prison_cell,
- cache, &cell);
+ cache, cell);
if (r) {
if (r < 0)
defer_bio(cache, bio);
@@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
bio, &lookup_result);
if (r == -EWOULDBLOCK) {
- cell_defer(cache, cell, true);
+ cell_defer(cache, *cell, true);
return DM_MAPIO_SUBMITTED;
} else if (r) {
DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+ cell_defer(cache, *cell, false);
bio_io_error(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
* We need to invalidate this block, so
* defer for the worker thread.
*/
- cell_defer(cache, cell, true);
+ cell_defer(cache, *cell, true);
r = DM_MAPIO_SUBMITTED;
} else {
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
inc_miss_counter(cache, bio);
remap_to_origin_clear_discard(cache, bio, block);
-
- cell_defer(cache, cell, false);
}
} else {
inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock))
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
else
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-
- cell_defer(cache, cell, false);
}
break;
case POLICY_MISS:
inc_miss_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
if (pb->req_nr != 0) {
/*
* This is a duplicate writethrough io that is no
* longer needed because the block has been demoted.
*/
bio_endio(bio, 0);
- cell_defer(cache, cell, false);
- return DM_MAPIO_SUBMITTED;
- } else {
+ cell_defer(cache, *cell, false);
+ r = DM_MAPIO_SUBMITTED;
+
+ } else
remap_to_origin_clear_discard(cache, bio, block);
- cell_defer(cache, cell, false);
- }
+
break;
default:
DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
(unsigned) lookup_result.op);
+ cell_defer(cache, *cell, false);
bio_io_error(bio);
r = DM_MAPIO_SUBMITTED;
}
@@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r;
}
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+ int r;
+ struct dm_bio_prison_cell *cell;
+ struct cache *cache = ti->private;
+
+ r = __cache_map(cache, bio, &cell);
+ if (r == DM_MAPIO_REMAPPED) {
+ inc_ds(cache, bio, cell);
+ cell_defer(cache, cell, false);
+ }
+
+ return r;
+}
+
static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct cache *cache = ti->private;
@@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
residency = policy_residency(cache->policy);
DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
- (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
+ (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
(unsigned long long)nr_blocks_metadata,
cache->sectors_per_block,
@@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < cache->sectors_per_block ||
do_div(io_opt_sectors, cache->sectors_per_block)) {
- blk_limits_io_min(limits, 0);
+ blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
}
set_discard_limits(cache, limits);
@@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4cba2d808af..2785007e0e4 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -59,7 +59,7 @@ struct dm_crypt_io {
int error;
sector_t sector;
struct dm_crypt_io *base_io;
-};
+} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
struct convert_context *ctx;
@@ -162,6 +162,8 @@ struct crypt_config {
*/
unsigned int dmreq_start;
+ unsigned int per_bio_data_size;
+
unsigned long flags;
unsigned int key_size;
unsigned int key_parts; /* independent parts in key buffer */
@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc,
kcryptd_async_done, dmreq_of_req(cc, ctx->req));
}
+static void crypt_free_req(struct crypt_config *cc,
+ struct ablkcipher_request *req, struct bio *base_bio)
+{
+ struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
+
+ if ((struct ablkcipher_request *)(io + 1) != req)
+ mempool_free(req, cc->req_pool);
+}
+
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
}
}
-static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
- struct bio *bio, sector_t sector)
+static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
+ struct bio *bio, sector_t sector)
{
- struct dm_crypt_io *io;
-
- io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->cc = cc;
io->base_bio = bio;
io->sector = sector;
@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
io->base_io = NULL;
io->ctx.req = NULL;
atomic_set(&io->io_pending, 0);
-
- return io;
}
static void crypt_inc_pending(struct dm_crypt_io *io)
@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
return;
if (io->ctx.req)
- mempool_free(io->ctx.req, cc->req_pool);
- mempool_free(io, cc->io_pool);
+ crypt_free_req(cc, io->ctx.req, base_bio);
+ if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
+ mempool_free(io, cc->io_pool);
if (likely(!base_io))
bio_endio(base_bio, error);
@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
* between fragments, so switch to a new dm_crypt_io structure.
*/
if (unlikely(!crypt_finished && remaining)) {
- new_io = crypt_io_alloc(io->cc, io->base_bio,
- sector);
+ new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
+ crypt_io_init(new_io, io->cc, io->base_bio, sector);
crypt_inc_pending(new_io);
crypt_convert_init(cc, &new_io->ctx, NULL,
io->base_bio, sector);
@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (error < 0)
io->error = -EIO;
- mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
+ crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
if (!atomic_dec_and_test(&ctx->cc_pending))
return;
@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ cc->per_bio_data_size = ti->per_bio_data_size =
+ sizeof(struct dm_crypt_io) + cc->dmreq_start +
+ sizeof(struct dm_crypt_request) + cc->iv_size;
+
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
- io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io = dm_per_bio_data(bio, cc->per_bio_data_size);
+ crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io->ctx.req = (struct ablkcipher_request *)(io + 1);
if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT))
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index db404a0f7e2..c09359db3a9 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -33,7 +33,6 @@ struct dm_io_client {
struct io {
unsigned long error_bits;
atomic_t count;
- struct completion *wait;
struct dm_io_client *client;
io_notify_fn callback;
void *context;
@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
* We need an io object to keep track of the number of bios that
* have been dispatched for a particular io.
*---------------------------------------------------------------*/
-static void dec_count(struct io *io, unsigned int region, int error)
+static void complete_io(struct io *io)
{
- if (error)
- set_bit(region, &io->error_bits);
+ unsigned long error_bits = io->error_bits;
+ io_notify_fn fn = io->callback;
+ void *context = io->context;
- if (atomic_dec_and_test(&io->count)) {
- if (io->vma_invalidate_size)
- invalidate_kernel_vmap_range(io->vma_invalidate_address,
- io->vma_invalidate_size);
+ if (io->vma_invalidate_size)
+ invalidate_kernel_vmap_range(io->vma_invalidate_address,
+ io->vma_invalidate_size);
- if (io->wait)
- complete(io->wait);
+ mempool_free(io, io->client->pool);
+ fn(error_bits, context);
+}
- else {
- unsigned long r = io->error_bits;
- io_notify_fn fn = io->callback;
- void *context = io->context;
+static void dec_count(struct io *io, unsigned int region, int error)
+{
+ if (error)
+ set_bit(region, &io->error_bits);
- mempool_free(io, io->client->pool);
- fn(r, context);
- }
- }
+ if (atomic_dec_and_test(&io->count))
+ complete_io(io);
}
static void endio(struct bio *bio, int error)
@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions,
dec_count(io, 0, 0);
}
+struct sync_io {
+ unsigned long error_bits;
+ struct completion wait;
+};
+
+static void sync_io_complete(unsigned long error, void *context)
+{
+ struct sync_io *sio = context;
+
+ sio->error_bits = error;
+ complete(&sio->wait);
+}
+
static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
- /*
- * gcc <= 4.3 can't do the alignment for stack variables, so we must
- * align it on our own.
- * volatile prevents the optimizer from removing or reusing
- * "io_" field from the stack frame (allowed in ANSI C).
- */
- volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
- struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
- DECLARE_COMPLETION_ONSTACK(wait);
+ struct io *io;
+ struct sync_io sio;
if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
return -EIO;
}
+ init_completion(&sio.wait);
+
+ io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
- io->wait = &wait;
io->client = client;
+ io->callback = sync_io_complete;
+ io->context = &sio;
io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size;
dispatch_io(rw, num_regions, where, dp, io, 1);
- wait_for_completion_io(&wait);
+ wait_for_completion_io(&sio.wait);
if (error_bits)
- *error_bits = io->error_bits;
+ *error_bits = sio.error_bits;
- return io->error_bits ? -EIO : 0;
+ return sio.error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
- io->wait = NULL;
io->client = client;
io->callback = fn;
io->context = context;
@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
* New collapsed (a)synchronous interface.
*
* If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
- * the queue with blk_unplug() some time later or set REQ_SYNC in
-io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
- * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
+ * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw.
+ * If you fail to do one of these, the IO will be submitted to the disk after
+ * q->unplug_delay, which defaults to 3ms in blk-settings.c.
*/
int dm_io(struct dm_io_request *io_req, unsigned num_regions,
struct dm_io_region *where, unsigned long *sync_error_bits)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f4167b013d9..833d7e752f0 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m)
dm_noflush_suspending(m->ti)));
}
-#define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required)
-
/*
* Map cloned requests
*/
@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
if (!__must_push_back(m))
r = -EIO; /* Failed */
goto out_unlock;
- }
- if (!pg_ready(m)) {
+ } else if (m->queue_io || m->pg_init_required) {
__pg_init_all_paths(m);
goto out_unlock;
}
+
if (set_mapinfo(m, map_context) < 0)
/* ENOMEM, requeue */
goto out_unlock;
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 09a688b3d48..50fca469caf 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr
*bit *= sctx->region_table_entry_bits;
}
+static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
+{
+ unsigned long region_index;
+ unsigned bit;
+
+ switch_get_position(sctx, region_nr, &region_index, &bit);
+
+ return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
+ ((1 << sctx->region_table_entry_bits) - 1);
+}
+
/*
* Find which path to use at given offset.
*/
static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
{
- unsigned long region_index;
- unsigned bit, path_nr;
+ unsigned path_nr;
sector_t p;
p = offset;
@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
else
sector_div(p, sctx->region_size);
- switch_get_position(sctx, p, &region_index, &bit);
- path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
- ((1 << sctx->region_table_entry_bits) - 1);
+ path_nr = switch_region_table_read(sctx, p);
/* This can only happen if the processor uses non-atomic stores. */
if (unlikely(path_nr >= sctx->nr_paths))
@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string)
}
static int process_set_region_mappings(struct switch_ctx *sctx,
- unsigned argc, char **argv)
+ unsigned argc, char **argv)
{
unsigned i;
unsigned long region_index = 0;
@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
unsigned long path_nr;
const char *string = argv[i];
+ if ((*string & 0xdf) == 'R') {
+ unsigned long cycle_length, num_write;
+
+ string++;
+ if (unlikely(*string == ',')) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+ cycle_length = parse_hex(&string);
+ if (unlikely(*string != ',')) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+ string++;
+ if (unlikely(!*string)) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+ num_write = parse_hex(&string);
+ if (unlikely(*string)) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+
+ if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
+ DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
+ cycle_length - 1, region_index);
+ return -EINVAL;
+ }
+ if (unlikely(region_index + num_write < region_index) ||
+ unlikely(region_index + num_write >= sctx->nr_regions)) {
+ DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
+ region_index, num_write, sctx->nr_regions);
+ return -EINVAL;
+ }
+
+ while (num_write--) {
+ region_index++;
+ path_nr = switch_region_table_read(sctx, region_index - cycle_length);
+ switch_region_table_write(sctx, region_index, path_nr);
+ }
+
+ continue;
+ }
+
if (*string == ':')
region_index++;
else {
@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti,
static struct target_type switch_target = {
.name = "switch",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = switch_ctr,
.dtr = switch_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5f59f1e3e5b..f9c6cb8dbcf 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
return q && !blk_queue_add_random(q);
}
+static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
+}
+
static bool dm_table_all_devices_attribute(struct dm_table *t,
iterate_devices_callout_fn func)
{
@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t)
return true;
}
+static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && blk_queue_discard(q);
+}
+
+static bool dm_table_supports_discards(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i = 0;
+
+ /*
+ * Unless any target used by the table set discards_supported,
+ * require at least one underlying device to support discards.
+ * t->devices includes internal dm devices such as mirror logs
+ * so we need to use iterate_devices here, which targets
+ * supporting discard selectively must provide.
+ */
+ while (i < dm_table_get_num_targets(t)) {
+ ti = dm_table_get_target(t, i++);
+
+ if (!ti->num_discard_bios)
+ continue;
+
+ if (ti->discards_supported)
+ return 1;
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, device_discard_capable, NULL))
+ return 1;
+ }
+
+ return 0;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
+ if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
+ queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ else
+ queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+
dm_table_set_integrity(t);
/*
@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_run_md_queue_async);
-static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return q && blk_queue_discard(q);
-}
-
-bool dm_table_supports_discards(struct dm_table *t)
-{
- struct dm_target *ti;
- unsigned i = 0;
-
- /*
- * Unless any target used by the table set discards_supported,
- * require at least one underlying device to support discards.
- * t->devices includes internal dm devices such as mirror logs
- * so we need to use iterate_devices here, which targets
- * supporting discard selectively must provide.
- */
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
-
- if (!ti->num_discard_bios)
- continue;
-
- if (ti->discards_supported)
- return 1;
-
- if (ti->type->iterate_devices &&
- ti->type->iterate_devices(ti, device_discard_capable, NULL))
- return 1;
- }
-
- return 0;
-}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fc9c848a60c..4843801173f 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -227,6 +227,7 @@ struct thin_c {
struct list_head list;
struct dm_dev *pool_dev;
struct dm_dev *origin_dev;
+ sector_t origin_size;
dm_thin_id dev_id;
struct pool *pool;
@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
struct dm_thin_new_mapping {
struct list_head list;
- bool quiesced:1;
- bool prepared:1;
bool pass_discard:1;
bool definitely_not_shared:1;
+ /*
+ * Track quiescing, copying and zeroing preparation actions. When this
+ * counter hits zero the block is prepared and can be inserted into the
+ * btree.
+ */
+ atomic_t prepare_actions;
+
int err;
struct thin_c *tc;
dm_block_t virt_block;
@@ -575,43 +581,41 @@ struct dm_thin_new_mapping {
bio_end_io_t *saved_bi_end_io;
};
-static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
+static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
{
struct pool *pool = m->tc->pool;
- if (m->quiesced && m->prepared) {
+ if (atomic_dec_and_test(&m->prepare_actions)) {
list_add_tail(&m->list, &pool->prepared_mappings);
wake_worker(pool);
}
}
-static void copy_complete(int read_err, unsigned long write_err, void *context)
+static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
{
unsigned long flags;
- struct dm_thin_new_mapping *m = context;
struct pool *pool = m->tc->pool;
- m->err = read_err || write_err ? -EIO : 0;
-
spin_lock_irqsave(&pool->lock, flags);
- m->prepared = true;
- __maybe_add_mapping(m);
+ __complete_mapping_preparation(m);
spin_unlock_irqrestore(&pool->lock, flags);
}
+static void copy_complete(int read_err, unsigned long write_err, void *context)
+{
+ struct dm_thin_new_mapping *m = context;
+
+ m->err = read_err || write_err ? -EIO : 0;
+ complete_mapping_preparation(m);
+}
+
static void overwrite_endio(struct bio *bio, int err)
{
- unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct dm_thin_new_mapping *m = h->overwrite_mapping;
- struct pool *pool = m->tc->pool;
m->err = err;
-
- spin_lock_irqsave(&pool->lock, flags);
- m->prepared = true;
- __maybe_add_mapping(m);
- spin_unlock_irqrestore(&pool->lock, flags);
+ complete_mapping_preparation(m);
}
/*----------------------------------------------------------------*/
@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
return m;
}
+static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
+ sector_t begin, sector_t end)
+{
+ int r;
+ struct dm_io_region to;
+
+ to.bdev = tc->pool_dev->bdev;
+ to.sector = begin;
+ to.count = end - begin;
+
+ r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
+ if (r < 0) {
+ DMERR_LIMIT("dm_kcopyd_zero() failed");
+ copy_complete(1, 1, m);
+ }
+}
+
+/*
+ * A partial copy also needs to zero the uncopied region.
+ */
static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_dev *origin, dm_block_t data_origin,
dm_block_t data_dest,
- struct dm_bio_prison_cell *cell, struct bio *bio)
+ struct dm_bio_prison_cell *cell, struct bio *bio,
+ sector_t len)
{
int r;
struct pool *pool = tc->pool;
@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
m->data_block = data_dest;
m->cell = cell;
+ /*
+ * quiesce action + copy action + an extra reference held for the
+ * duration of this function (we may need to inc later for a
+ * partial zero).
+ */
+ atomic_set(&m->prepare_actions, 3);
+
if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
- m->quiesced = true;
+ complete_mapping_preparation(m); /* already quiesced */
/*
* IO to pool_dev remaps to the pool target's data_dev.
@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
from.bdev = origin->bdev;
from.sector = data_origin * pool->sectors_per_block;
- from.count = pool->sectors_per_block;
+ from.count = len;
to.bdev = tc->pool_dev->bdev;
to.sector = data_dest * pool->sectors_per_block;
- to.count = pool->sectors_per_block;
+ to.count = len;
r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
0, copy_complete, m);
if (r < 0) {
- mempool_free(m, pool->mapping_pool);
DMERR_LIMIT("dm_kcopyd_copy() failed");
- cell_error(pool, cell);
+ copy_complete(1, 1, m);
+
+ /*
+ * We allow the zero to be issued, to simplify the
+ * error path. Otherwise we'd need to start
+ * worrying about decrementing the prepare_actions
+ * counter.
+ */
+ }
+
+ /*
+ * Do we need to zero a tail region?
+ */
+ if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
+ atomic_inc(&m->prepare_actions);
+ ll_zero(tc, m,
+ data_dest * pool->sectors_per_block + len,
+ (data_dest + 1) * pool->sectors_per_block);
}
}
+
+ complete_mapping_preparation(m); /* drop our ref */
}
static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_bio_prison_cell *cell, struct bio *bio)
{
schedule_copy(tc, virt_block, tc->pool_dev,
- data_origin, data_dest, cell, bio);
-}
-
-static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
- dm_block_t data_dest,
- struct dm_bio_prison_cell *cell, struct bio *bio)
-{
- schedule_copy(tc, virt_block, tc->origin_dev,
- virt_block, data_dest, cell, bio);
+ data_origin, data_dest, cell, bio,
+ tc->pool->sectors_per_block);
}
static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
struct pool *pool = tc->pool;
struct dm_thin_new_mapping *m = get_next_mapping(pool);
- m->quiesced = true;
- m->prepared = false;
+ atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
m->tc = tc;
m->virt_block = virt_block;
m->data_block = data_block;
@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
inc_all_io_entry(pool, bio);
remap_and_issue(tc, bio, data_block);
- } else {
- int r;
- struct dm_io_region to;
- to.bdev = tc->pool_dev->bdev;
- to.sector = data_block * pool->sectors_per_block;
- to.count = pool->sectors_per_block;
+ } else
+ ll_zero(tc, m,
+ data_block * pool->sectors_per_block,
+ (data_block + 1) * pool->sectors_per_block);
+}
- r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
- if (r < 0) {
- mempool_free(m, pool->mapping_pool);
- DMERR_LIMIT("dm_kcopyd_zero() failed");
- cell_error(pool, cell);
- }
- }
+static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
+ dm_block_t data_dest,
+ struct dm_bio_prison_cell *cell, struct bio *bio)
+{
+ struct pool *pool = tc->pool;
+ sector_t virt_block_begin = virt_block * pool->sectors_per_block;
+ sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
+
+ if (virt_block_end <= tc->origin_size)
+ schedule_copy(tc, virt_block, tc->origin_dev,
+ virt_block, data_dest, cell, bio,
+ pool->sectors_per_block);
+
+ else if (virt_block_begin < tc->origin_size)
+ schedule_copy(tc, virt_block, tc->origin_dev,
+ virt_block, data_dest, cell, bio,
+ tc->origin_size - virt_block_begin);
+
+ else
+ schedule_zero(tc, virt_block, data_dest, cell, bio);
}
/*
@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
- remap_to_origin_and_issue(tc, bio);
+ if (bio_end_sector(bio) <= tc->origin_size)
+ remap_to_origin_and_issue(tc, bio);
+
+ else if (bio->bi_iter.bi_sector < tc->origin_size) {
+ zero_fill_bio(bio);
+ bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
+ remap_to_origin_and_issue(tc, bio);
+
+ } else {
+ zero_fill_bio(bio);
+ bio_endio(bio, 0);
+ }
} else
provision_block(tc, bio, block, cell);
break;
@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < pool->sectors_per_block ||
do_div(io_opt_sectors, pool->sectors_per_block)) {
- blk_limits_io_min(limits, 0);
+ blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
}
@@ -3141,7 +3206,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
- .version = {1, 12, 0},
+ .version = {1, 13, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry_safe(m, tmp, &work, list) {
list_del(&m->list);
- m->quiesced = true;
- __maybe_add_mapping(m);
+ __complete_mapping_preparation(m);
}
spin_unlock_irqrestore(&pool->lock, flags);
}
@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti)
noflush_work(tc, do_noflush_stop);
}
+static int thin_preresume(struct dm_target *ti)
+{
+ struct thin_c *tc = ti->private;
+
+ if (tc->origin_dev)
+ tc->origin_size = get_dev_size(tc->origin_dev->bdev);
+
+ return 0;
+}
+
/*
* <nr mapped sectors> <highest mapped sector>
*/
@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti,
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 12, 0},
+ .version = {1, 13, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
.map = thin_map,
.end_io = thin_endio,
+ .preresume = thin_preresume,
.presuspend = thin_presuspend,
.postsuspend = thin_postsuspend,
.status = thin_status,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index ed76126aac5..e81d2152fa6 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t);
unsigned dm_table_get_type(struct dm_table *t);
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
-bool dm_table_supports_discards(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
index 6352bec8419..71f387ce8cb 100644
--- a/drivers/mfd/rtsx_usb.c
+++ b/drivers/mfd/rtsx_usb.c
@@ -744,6 +744,7 @@ static struct usb_device_id rtsx_usb_usb_ids[] = {
{ USB_DEVICE(0x0BDA, 0x0140) },
{ }
};
+MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids);
static struct usb_driver rtsx_usb_driver = {
.name = "rtsx_usb",
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 452782bffeb..ede41f05c39 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2028,8 +2028,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
/* complete ongoing async transfer before issuing discard */
if (card->host->areq)
mmc_blk_issue_rw_rq(mq, NULL);
- if (req->cmd_flags & REQ_SECURE &&
- !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
+ if (req->cmd_flags & REQ_SECURE)
ret = mmc_blk_issue_secdiscard_rq(mq, req);
else
ret = mmc_blk_issue_discard_rq(mq, req);
@@ -2432,6 +2431,8 @@ static int mmc_blk_probe(struct mmc_card *card)
if (!(card->csd.cmdclass & CCC_BLOCK_READ))
return -ENODEV;
+ mmc_fixup_device(card, blk_fixups);
+
md = mmc_blk_alloc(card);
if (IS_ERR(md))
return PTR_ERR(md);
@@ -2446,7 +2447,6 @@ static int mmc_blk_probe(struct mmc_card *card)
goto out;
mmc_set_drvdata(card, md);
- mmc_fixup_device(card, blk_fixups);
if (mmc_add_disk(md))
goto out;
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index d2dbf02022b..8a1f1240e05 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -180,7 +180,6 @@ static int mmc_bus_resume(struct device *dev)
#endif
#ifdef CONFIG_PM_RUNTIME
-
static int mmc_runtime_suspend(struct device *dev)
{
struct mmc_card *card = mmc_dev_to_card(dev);
@@ -196,17 +195,10 @@ static int mmc_runtime_resume(struct device *dev)
return host->bus_ops->runtime_resume(host);
}
-
-static int mmc_runtime_idle(struct device *dev)
-{
- return 0;
-}
-
#endif /* !CONFIG_PM_RUNTIME */
static const struct dev_pm_ops mmc_bus_pm_ops = {
- SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume,
- mmc_runtime_idle)
+ SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, NULL)
SET_SYSTEM_SLEEP_PM_OPS(mmc_bus_suspend, mmc_bus_resume)
};
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7dc0c85fdb6..d03a080fb9c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2102,7 +2102,8 @@ EXPORT_SYMBOL(mmc_can_sanitize);
int mmc_can_secure_erase_trim(struct mmc_card *card)
{
- if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)
+ if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) &&
+ !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
return 1;
return 0;
}
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 793c6f7ddb0..1eda8dd8c86 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -324,13 +324,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
}
}
+ /*
+ * The EXT_CSD format is meant to be forward compatible. As long
+ * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV
+ * are authorized, see JEDEC JESD84-B50 section B.8.
+ */
card->ext_csd.rev = ext_csd[EXT_CSD_REV];
- if (card->ext_csd.rev > 7) {
- pr_err("%s: unrecognised EXT_CSD revision %d\n",
- mmc_hostname(card->host), card->ext_csd.rev);
- err = -EINVAL;
- goto out;
- }
card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index 6c36fccaa1e..dd1d1e0fe32 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -91,7 +91,7 @@ void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table)
(f->cis_device == card->cis.device ||
f->cis_device == (u16) SDIO_ANY_ID) &&
rev >= f->rev_start && rev <= f->rev_end) {
- dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup);
+ dev_dbg(&card->dev, "calling %pf\n", f->vendor_fixup);
f->vendor_fixup(card, f->data);
}
}
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index 274ef00b446..48d0c93ba25 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -184,6 +184,9 @@ int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
mmc_delay(10);
}
+ if (!i)
+ pr_err("%s: card never left busy state\n", mmc_hostname(host));
+
if (rocr && !mmc_host_is_spi(host))
*rocr = cmd.resp[0];
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index a5652548230..45113582246 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -290,6 +290,18 @@ config MMC_MOXART
be found on some embedded hardware such as UC-7112-LX.
If you have a controller with this interface, say Y here.
+config MMC_SDHCI_ST
+ tristate "SDHCI support on STMicroelectronics SoC"
+ depends on ARCH_STI
+ depends on MMC_SDHCI_PLTFM
+ select MMC_SDHCI_IO_ACCESSORS
+ help
+ This selects the Secure Digital Host Controller Interface in
+ STMicroelectronics SoCs.
+
+ If you have a controller with this interface, say Y or M here.
+ If unsure, say N.
+
config MMC_OMAP
tristate "TI OMAP Multimedia Card Interface support"
depends on ARCH_OMAP
@@ -303,6 +315,7 @@ config MMC_OMAP
config MMC_OMAP_HS
tristate "TI OMAP High Speed Multimedia Card Interface support"
+ depends on HAS_DMA
depends on ARCH_OMAP2PLUS || COMPILE_TEST
help
This selects the TI OMAP High Speed Multimedia card Interface.
@@ -343,7 +356,7 @@ config MMC_ATMELMCI
config MMC_SDHCI_MSM
tristate "Qualcomm SDHCI Controller Support"
- depends on ARCH_QCOM
+ depends on ARCH_QCOM || (ARM && COMPILE_TEST)
depends on MMC_SDHCI_PLTFM
help
This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -440,6 +453,7 @@ config MMC_SPI
config MMC_S3C
tristate "Samsung S3C SD/MMC Card Interface support"
depends on ARCH_S3C24XX
+ depends on S3C24XX_DMAC
help
This selects a driver for the MCI interface found in
Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs.
@@ -477,15 +491,6 @@ config MMC_S3C_DMA
working properly and needs to be debugged before this
option is useful.
-config MMC_S3C_PIODMA
- bool "Support for both PIO and DMA"
- help
- Compile both the PIO and DMA transfer routines into the
- driver and let the platform select at run-time which one
- is best.
-
- See notes for the DMA option.
-
endchoice
config MMC_SDRICOH_CS
@@ -623,7 +628,7 @@ config MMC_DW_PCI
config MMC_SH_MMCIF
tristate "SuperH Internal MMCIF support"
- depends on MMC_BLOCK
+ depends on MMC_BLOCK && HAS_DMA
depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
help
This selects the MMC Host Interface controller (MMCIF).
@@ -697,6 +702,7 @@ config MMC_WMT
config MMC_USDHI6ROL0
tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support"
+ depends on HAS_DMA
help
This selects support for the Renesas USDHI6ROL0 SD/SDIO
Host Controller
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 7f81ddf1dd2..f211eede8db 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o
obj-$(CONFIG_MMC_SDHCI_BCM_KONA) += sdhci-bcm-kona.o
obj-$(CONFIG_MMC_SDHCI_BCM2835) += sdhci-bcm2835.o
obj-$(CONFIG_MMC_SDHCI_MSM) += sdhci-msm.o
+obj-$(CONFIG_MMC_SDHCI_ST) += sdhci-st.o
ifeq ($(CONFIG_CB710_DEBUG),y)
CFLAGS-cb710-mmc += -DDEBUG
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 1ac227c603b..8f216edbdf0 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -111,8 +111,7 @@ static const u8 tuning_blk_pattern_8bit[] = {
0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
};
-static inline bool dw_mci_fifo_reset(struct dw_mci *host);
-static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host);
+static bool dw_mci_reset(struct dw_mci *host);
#if defined(CONFIG_DEBUG_FS)
static int dw_mci_req_show(struct seq_file *s, void *v)
@@ -997,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
int gpio_ro = mmc_gpio_get_ro(mmc);
/* Use platform get_ro function, else try on board write protect */
- if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
+ if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) ||
+ (slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT))
read_only = 0;
else if (!IS_ERR_VALUE(gpio_ro))
read_only = gpio_ro;
@@ -1235,7 +1235,7 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
* After an error, there may be data lingering
* in the FIFO
*/
- dw_mci_fifo_reset(host);
+ dw_mci_reset(host);
} else {
data->bytes_xfered = data->blocks * data->blksz;
data->error = 0;
@@ -1352,7 +1352,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
/* CMD error in data command */
if (mrq->cmd->error && mrq->data)
- dw_mci_fifo_reset(host);
+ dw_mci_reset(host);
host->cmd = NULL;
host->data = NULL;
@@ -1963,14 +1963,8 @@ static void dw_mci_work_routine_card(struct work_struct *work)
}
/* Power down slot */
- if (present == 0) {
- /* Clear down the FIFO */
- dw_mci_fifo_reset(host);
-#ifdef CONFIG_MMC_DW_IDMAC
- dw_mci_idmac_reset(host);
-#endif
-
- }
+ if (present == 0)
+ dw_mci_reset(host);
spin_unlock_bh(&host->lock);
@@ -2021,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
/* get quirks */
for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
- if (of_get_property(np, of_slot_quirks[idx].quirk, NULL))
+ if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) {
+ dev_warn(dev, "Slot quirk %s is deprecated\n",
+ of_slot_quirks[idx].quirk);
quirks |= of_slot_quirks[idx].id;
+ }
return quirks;
}
@@ -2208,8 +2205,11 @@ static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
return false;
}
-static inline bool dw_mci_fifo_reset(struct dw_mci *host)
+static bool dw_mci_reset(struct dw_mci *host)
{
+ u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET;
+ bool ret = false;
+
/*
* Reseting generates a block interrupt, hence setting
* the scatter-gather pointer to NULL.
@@ -2219,15 +2219,60 @@ static inline bool dw_mci_fifo_reset(struct dw_mci *host)
host->sg = NULL;
}
- return dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET);
-}
+ if (host->use_dma)
+ flags |= SDMMC_CTRL_DMA_RESET;
-static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host)
-{
- return dw_mci_ctrl_reset(host,
- SDMMC_CTRL_FIFO_RESET |
- SDMMC_CTRL_RESET |
- SDMMC_CTRL_DMA_RESET);
+ if (dw_mci_ctrl_reset(host, flags)) {
+ /*
+ * In all cases we clear the RAWINTS register to clear any
+ * interrupts.
+ */
+ mci_writel(host, RINTSTS, 0xFFFFFFFF);
+
+ /* if using dma we wait for dma_req to clear */
+ if (host->use_dma) {
+ unsigned long timeout = jiffies + msecs_to_jiffies(500);
+ u32 status;
+ do {
+ status = mci_readl(host, STATUS);
+ if (!(status & SDMMC_STATUS_DMA_REQ))
+ break;
+ cpu_relax();
+ } while (time_before(jiffies, timeout));
+
+ if (status & SDMMC_STATUS_DMA_REQ) {
+ dev_err(host->dev,
+ "%s: Timeout waiting for dma_req to "
+ "clear during reset\n", __func__);
+ goto ciu_out;
+ }
+
+ /* when using DMA next we reset the fifo again */
+ if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET))
+ goto ciu_out;
+ }
+ } else {
+ /* if the controller reset bit did clear, then set clock regs */
+ if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
+ dev_err(host->dev, "%s: fifo/dma reset bits didn't "
+ "clear but ciu was reset, doing clock update\n",
+ __func__);
+ goto ciu_out;
+ }
+ }
+
+#if IS_ENABLED(CONFIG_MMC_DW_IDMAC)
+ /* It is also recommended that we reset and reprogram idmac */
+ dw_mci_idmac_reset(host);
+#endif
+
+ ret = true;
+
+ciu_out:
+ /* After a CTRL reset we need to have CIU set clock registers */
+ mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0);
+
+ return ret;
}
#ifdef CONFIG_OF
@@ -2238,6 +2283,9 @@ static struct dw_mci_of_quirks {
{
.quirk = "broken-cd",
.id = DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
+ }, {
+ .quirk = "disable-wp",
+ .id = DW_MCI_QUIRK_NO_WRITE_PROTECT,
},
};
@@ -2425,7 +2473,7 @@ int dw_mci_probe(struct dw_mci *host)
}
/* Reset all blocks */
- if (!dw_mci_ctrl_all_reset(host))
+ if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS))
return -ENODEV;
host->dma_ops = host->pdata->dma_ops;
@@ -2612,7 +2660,7 @@ int dw_mci_resume(struct dw_mci *host)
}
}
- if (!dw_mci_ctrl_all_reset(host)) {
+ if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
ret = -ENODEV;
return ret;
}
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 738fa241d05..08fd956d81f 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -129,6 +129,7 @@
#define SDMMC_CMD_INDX(n) ((n) & 0x1F)
/* Status register defines */
#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF)
+#define SDMMC_STATUS_DMA_REQ BIT(31)
/* FIFOTH register defines */
#define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \
((r) & 0xFFF) << 16 | \
@@ -150,6 +151,10 @@
/* Card read threshold */
#define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x))
+/* All ctrl reset bits */
+#define SDMMC_CTRL_ALL_RESET_FLAGS \
+ (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
+
/* Register access macros */
#define mci_readl(dev, reg) \
__raw_readl((dev)->regs + SDMMC_##reg)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7ad463e9741..e4d47070415 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -52,34 +52,53 @@ static unsigned int fmax = 515633;
* struct variant_data - MMCI variant-specific quirks
* @clkreg: default value for MCICLOCK register
* @clkreg_enable: enable value for MMCICLOCK register
+ * @clkreg_8bit_bus_enable: enable value for 8 bit bus
+ * @clkreg_neg_edge_enable: enable value for inverted data/cmd output
* @datalength_bits: number of bits in the MMCIDATALENGTH register
* @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
* is asserted (likewise for RX)
* @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
* is asserted (likewise for RX)
+ * @data_cmd_enable: enable value for data commands.
* @sdio: variant supports SDIO
* @st_clkdiv: true if using a ST-specific clock divider algorithm
+ * @datactrl_mask_ddrmode: ddr mode mask in datactrl register.
* @blksz_datactrl16: true if Block size is at b16..b30 position in datactrl register
+ * @blksz_datactrl4: true if Block size is at b4..b16 position in datactrl
+ * register
* @pwrreg_powerup: power up value for MMCIPOWER register
+ * @f_max: maximum clk frequency supported by the controller.
* @signal_direction: input/out direction of bus signals can be indicated
* @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock
* @busy_detect: true if busy detection on dat0 is supported
* @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply
+ * @explicit_mclk_control: enable explicit mclk control in driver.
+ * @qcom_fifo: enables qcom specific fifo pio read logic.
+ * @reversed_irq_handling: handle data irq before cmd irq.
*/
struct variant_data {
unsigned int clkreg;
unsigned int clkreg_enable;
+ unsigned int clkreg_8bit_bus_enable;
+ unsigned int clkreg_neg_edge_enable;
unsigned int datalength_bits;
unsigned int fifosize;
unsigned int fifohalfsize;
+ unsigned int data_cmd_enable;
+ unsigned int datactrl_mask_ddrmode;
bool sdio;
bool st_clkdiv;
bool blksz_datactrl16;
+ bool blksz_datactrl4;
u32 pwrreg_powerup;
+ u32 f_max;
bool signal_direction;
bool pwrreg_clkgate;
bool busy_detect;
bool pwrreg_nopower;
+ bool explicit_mclk_control;
+ bool qcom_fifo;
+ bool reversed_irq_handling;
};
static struct variant_data variant_arm = {
@@ -87,6 +106,8 @@ static struct variant_data variant_arm = {
.fifohalfsize = 8 * 4,
.datalength_bits = 16,
.pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 100000000,
+ .reversed_irq_handling = true,
};
static struct variant_data variant_arm_extended_fifo = {
@@ -94,6 +115,7 @@ static struct variant_data variant_arm_extended_fifo = {
.fifohalfsize = 64 * 4,
.datalength_bits = 16,
.pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 100000000,
};
static struct variant_data variant_arm_extended_fifo_hwfc = {
@@ -102,15 +124,18 @@ static struct variant_data variant_arm_extended_fifo_hwfc = {
.clkreg_enable = MCI_ARM_HWFCEN,
.datalength_bits = 16,
.pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 100000000,
};
static struct variant_data variant_u300 = {
.fifosize = 16 * 4,
.fifohalfsize = 8 * 4,
.clkreg_enable = MCI_ST_U300_HWFCEN,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
.datalength_bits = 16,
.sdio = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.pwrreg_nopower = true,
@@ -124,6 +149,7 @@ static struct variant_data variant_nomadik = {
.sdio = true,
.st_clkdiv = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.pwrreg_nopower = true,
@@ -134,10 +160,13 @@ static struct variant_data variant_ux500 = {
.fifohalfsize = 8 * 4,
.clkreg = MCI_CLK_ENABLE,
.clkreg_enable = MCI_ST_UX500_HWFCEN,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
+ .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE,
.datalength_bits = 24,
.sdio = true,
.st_clkdiv = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.busy_detect = true,
@@ -149,17 +178,38 @@ static struct variant_data variant_ux500v2 = {
.fifohalfsize = 8 * 4,
.clkreg = MCI_CLK_ENABLE,
.clkreg_enable = MCI_ST_UX500_HWFCEN,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
+ .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE,
+ .datactrl_mask_ddrmode = MCI_ST_DPSM_DDRMODE,
.datalength_bits = 24,
.sdio = true,
.st_clkdiv = true,
.blksz_datactrl16 = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.busy_detect = true,
.pwrreg_nopower = true,
};
+static struct variant_data variant_qcom = {
+ .fifosize = 16 * 4,
+ .fifohalfsize = 8 * 4,
+ .clkreg = MCI_CLK_ENABLE,
+ .clkreg_enable = MCI_QCOM_CLK_FLOWENA |
+ MCI_QCOM_CLK_SELECT_IN_FBCLK,
+ .clkreg_8bit_bus_enable = MCI_QCOM_CLK_WIDEBUS_8,
+ .datactrl_mask_ddrmode = MCI_QCOM_CLK_SELECT_IN_DDR_MODE,
+ .data_cmd_enable = MCI_QCOM_CSPM_DATCMD,
+ .blksz_datactrl4 = true,
+ .datalength_bits = 24,
+ .pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 208000000,
+ .explicit_mclk_control = true,
+ .qcom_fifo = true,
+};
+
static int mmci_card_busy(struct mmc_host *mmc)
{
struct mmci_host *host = mmc_priv(mmc);
@@ -260,7 +310,9 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
host->cclk = 0;
if (desired) {
- if (desired >= host->mclk) {
+ if (variant->explicit_mclk_control) {
+ host->cclk = host->mclk;
+ } else if (desired >= host->mclk) {
clk = MCI_CLK_BYPASS;
if (variant->st_clkdiv)
clk |= MCI_ST_UX500_NEG_EDGE;
@@ -299,11 +351,11 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4)
clk |= MCI_4BIT_BUS;
if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8)
- clk |= MCI_ST_8BIT_BUS;
+ clk |= variant->clkreg_8bit_bus_enable;
if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
- clk |= MCI_ST_UX500_NEG_EDGE;
+ clk |= variant->clkreg_neg_edge_enable;
mmci_write_clkreg(host, clk);
}
@@ -719,7 +771,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
data->bytes_xfered = 0;
clks = (unsigned long long)data->timeout_ns * host->cclk;
- do_div(clks, 1000000000UL);
+ do_div(clks, NSEC_PER_SEC);
timeout = data->timeout_clks + (unsigned int)clks;
@@ -732,6 +784,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
if (variant->blksz_datactrl16)
datactrl = MCI_DPSM_ENABLE | (data->blksz << 16);
+ else if (variant->blksz_datactrl4)
+ datactrl = MCI_DPSM_ENABLE | (data->blksz << 4);
else
datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
@@ -767,7 +821,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
- datactrl |= MCI_ST_DPSM_DDRMODE;
+ datactrl |= variant->datactrl_mask_ddrmode;
/*
* Attempt to use DMA operation mode, if this
@@ -812,7 +866,7 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) {
writel(0, base + MMCICOMMAND);
- udelay(1);
+ mmci_reg_delay(host);
}
c |= cmd->opcode | MCI_CPSM_ENABLE;
@@ -824,6 +878,9 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
if (/*interrupt*/0)
c |= MCI_CPSM_INTERRUPT;
+ if (mmc_cmd_type(cmd) == MMC_CMD_ADTC)
+ c |= host->variant->data_cmd_enable;
+
host->cmd = cmd;
writel(cmd->arg, base + MMCIARGUMENT);
@@ -834,6 +891,10 @@ static void
mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
unsigned int status)
{
+ /* Make sure we have data to handle */
+ if (!data)
+ return;
+
/* First check for errors */
if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
@@ -902,9 +963,17 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
unsigned int status)
{
void __iomem *base = host->base;
- bool sbc = (cmd == host->mrq->sbc);
- bool busy_resp = host->variant->busy_detect &&
- (cmd->flags & MMC_RSP_BUSY);
+ bool sbc, busy_resp;
+
+ if (!cmd)
+ return;
+
+ sbc = (cmd == host->mrq->sbc);
+ busy_resp = host->variant->busy_detect && (cmd->flags & MMC_RSP_BUSY);
+
+ if (!((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
+ MCI_CMDSENT|MCI_CMDRESPEND)))
+ return;
/* Check if we need to wait for busy completion. */
if (host->busy_status && (status & MCI_ST_CARDBUSY))
@@ -957,15 +1026,34 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
}
}
+static int mmci_get_rx_fifocnt(struct mmci_host *host, u32 status, int remain)
+{
+ return remain - (readl(host->base + MMCIFIFOCNT) << 2);
+}
+
+static int mmci_qcom_get_rx_fifocnt(struct mmci_host *host, u32 status, int r)
+{
+ /*
+ * on qcom SDCC4 only 8 words are used in each burst so only 8 addresses
+ * from the fifo range should be used
+ */
+ if (status & MCI_RXFIFOHALFFULL)
+ return host->variant->fifohalfsize;
+ else if (status & MCI_RXDATAAVLBL)
+ return 4;
+
+ return 0;
+}
+
static int mmci_pio_read(struct mmci_host *host, char *buffer, unsigned int remain)
{
void __iomem *base = host->base;
char *ptr = buffer;
- u32 status;
+ u32 status = readl(host->base + MMCISTATUS);
int host_remain = host->size;
do {
- int count = host_remain - (readl(base + MMCIFIFOCNT) << 2);
+ int count = host->get_rx_fifocnt(host, status, host_remain);
if (count > remain)
count = remain;
@@ -1132,9 +1220,6 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
spin_lock(&host->lock);
do {
- struct mmc_command *cmd;
- struct mmc_data *data;
-
status = readl(host->base + MMCISTATUS);
if (host->singleirq) {
@@ -1154,16 +1239,13 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
- cmd = host->cmd;
- if ((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
- MCI_CMDSENT|MCI_CMDRESPEND) && cmd)
- mmci_cmd_irq(host, cmd, status);
-
- data = host->data;
- if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
- MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND|
- MCI_DATABLOCKEND) && data)
- mmci_data_irq(host, data, status);
+ if (host->variant->reversed_irq_handling) {
+ mmci_data_irq(host, host->data, status);
+ mmci_cmd_irq(host, host->cmd, status);
+ } else {
+ mmci_cmd_irq(host, host->cmd, status);
+ mmci_data_irq(host, host->data, status);
+ }
/* Don't poll for busy completion in irq context. */
if (host->busy_status)
@@ -1296,6 +1378,17 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
if (!ios->clock && variant->pwrreg_clkgate)
pwr &= ~MCI_PWR_ON;
+ if (host->variant->explicit_mclk_control &&
+ ios->clock != host->clock_cache) {
+ ret = clk_set_rate(host->clk, ios->clock);
+ if (ret < 0)
+ dev_err(mmc_dev(host->mmc),
+ "Error setting clock rate (%d)\n", ret);
+ else
+ host->mclk = clk_get_rate(host->clk);
+ }
+ host->clock_cache = ios->clock;
+
spin_lock_irqsave(&host->lock, flags);
mmci_set_clkreg(host, ios->clock);
@@ -1443,6 +1536,11 @@ static int mmci_probe(struct amba_device *dev,
if (ret)
goto host_free;
+ if (variant->qcom_fifo)
+ host->get_rx_fifocnt = mmci_qcom_get_rx_fifocnt;
+ else
+ host->get_rx_fifocnt = mmci_get_rx_fifocnt;
+
host->plat = plat;
host->variant = variant;
host->mclk = clk_get_rate(host->clk);
@@ -1451,8 +1549,8 @@ static int mmci_probe(struct amba_device *dev,
* so we try to adjust the clock down to this,
* (if possible).
*/
- if (host->mclk > 100000000) {
- ret = clk_set_rate(host->clk, 100000000);
+ if (host->mclk > variant->f_max) {
+ ret = clk_set_rate(host->clk, variant->f_max);
if (ret < 0)
goto clk_disable;
host->mclk = clk_get_rate(host->clk);
@@ -1471,9 +1569,12 @@ static int mmci_probe(struct amba_device *dev,
* The ARM and ST versions of the block have slightly different
* clock divider equations which means that the minimum divider
* differs too.
+ * on Qualcomm like controllers get the nearest minimum clock to 100Khz
*/
if (variant->st_clkdiv)
mmc->f_min = DIV_ROUND_UP(host->mclk, 257);
+ else if (variant->explicit_mclk_control)
+ mmc->f_min = clk_round_rate(host->clk, 100000);
else
mmc->f_min = DIV_ROUND_UP(host->mclk, 512);
/*
@@ -1483,9 +1584,14 @@ static int mmci_probe(struct amba_device *dev,
* the block, of course.
*/
if (mmc->f_max)
- mmc->f_max = min(host->mclk, mmc->f_max);
+ mmc->f_max = variant->explicit_mclk_control ?
+ min(variant->f_max, mmc->f_max) :
+ min(host->mclk, mmc->f_max);
else
- mmc->f_max = min(host->mclk, fmax);
+ mmc->f_max = variant->explicit_mclk_control ?
+ fmax : min(host->mclk, fmax);
+
+
dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
/* Get regulators and the supported OCR mask */
@@ -1752,6 +1858,12 @@ static struct amba_id mmci_ids[] = {
.mask = 0xf0ffffff,
.data = &variant_ux500v2,
},
+ /* Qualcomm variants */
+ {
+ .id = 0x00051180,
+ .mask = 0x000fffff,
+ .data = &variant_qcom,
+ },
{ 0, 0 },
};
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 347d942d740..a1f5e4f49e2 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -41,6 +41,15 @@
/* Modified PL180 on Versatile Express platform */
#define MCI_ARM_HWFCEN (1 << 12)
+/* Modified on Qualcomm Integrations */
+#define MCI_QCOM_CLK_WIDEBUS_8 (BIT(10) | BIT(11))
+#define MCI_QCOM_CLK_FLOWENA BIT(12)
+#define MCI_QCOM_CLK_INVERTOUT BIT(13)
+
+/* select in latch data and command in */
+#define MCI_QCOM_CLK_SELECT_IN_FBCLK BIT(15)
+#define MCI_QCOM_CLK_SELECT_IN_DDR_MODE (BIT(14) | BIT(15))
+
#define MMCIARGUMENT 0x008
#define MMCICOMMAND 0x00c
#define MCI_CPSM_RESPONSE (1 << 6)
@@ -54,6 +63,14 @@
#define MCI_ST_NIEN (1 << 13)
#define MCI_ST_CE_ATACMD (1 << 14)
+/* Modified on Qualcomm Integrations */
+#define MCI_QCOM_CSPM_DATCMD BIT(12)
+#define MCI_QCOM_CSPM_MCIABORT BIT(13)
+#define MCI_QCOM_CSPM_CCSENABLE BIT(14)
+#define MCI_QCOM_CSPM_CCSDISABLE BIT(15)
+#define MCI_QCOM_CSPM_AUTO_CMD19 BIT(16)
+#define MCI_QCOM_CSPM_AUTO_CMD21 BIT(21)
+
#define MMCIRESPCMD 0x010
#define MMCIRESPONSE0 0x014
#define MMCIRESPONSE1 0x018
@@ -191,6 +208,8 @@ struct mmci_host {
spinlock_t lock;
unsigned int mclk;
+ /* cached value of requested clk in set_ios */
+ unsigned int clock_cache;
unsigned int cclk;
u32 pwr_reg;
u32 pwr_reg_add;
@@ -210,6 +229,7 @@ struct mmci_host {
/* pio stuff */
struct sg_mapping_iter sg_miter;
unsigned int size;
+ int (*get_rx_fifocnt)(struct mmci_host *h, u32 status, int remain);
#ifdef CONFIG_DMA_ENGINE
/* DMA stuff */
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 74924a04026..b4b1efbf6c1 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -13,7 +13,6 @@
* warranty of any kind, whether express or implied.
*/
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index babfea03ba8..140885a5a4e 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -86,7 +86,8 @@ static int mxs_mmc_get_cd(struct mmc_host *mmc)
if (ret >= 0)
return ret;
- present = !(readl(ssp->base + HW_SSP_STATUS(ssp)) &
+ present = mmc->caps & MMC_CAP_NEEDS_POLL ||
+ !(readl(ssp->base + HW_SSP_STATUS(ssp)) &
BM_SSP_STATUS_CARD_DETECT);
if (mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b7b7558592..965672663ef 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -29,6 +29,7 @@
#include <linux/timer.h>
#include <linux/clk.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/of_gpio.h>
#include <linux/of_device.h>
#include <linux/omap-dmaengine.h>
@@ -36,6 +37,7 @@
#include <linux/mmc/core.h>
#include <linux/mmc/mmc.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/gpio.h>
#include <linux/regulator/consumer.h>
#include <linux/pinctrl/consumer.h>
@@ -54,6 +56,7 @@
#define OMAP_HSMMC_RSP54 0x0118
#define OMAP_HSMMC_RSP76 0x011C
#define OMAP_HSMMC_DATA 0x0120
+#define OMAP_HSMMC_PSTATE 0x0124
#define OMAP_HSMMC_HCTL 0x0128
#define OMAP_HSMMC_SYSCTL 0x012C
#define OMAP_HSMMC_STAT 0x0130
@@ -91,7 +94,10 @@
#define BCE (1 << 1)
#define FOUR_BIT (1 << 1)
#define HSPE (1 << 2)
+#define IWE (1 << 24)
#define DDR (1 << 19)
+#define CLKEXTFREE (1 << 16)
+#define CTPL (1 << 11)
#define DW8 (1 << 5)
#define OD 0x1
#define STAT_CLEAR 0xFFFFFFFF
@@ -101,11 +107,15 @@
#define SRD (1 << 26)
#define SOFTRESET (1 << 1)
+/* PSTATE */
+#define DLEV_DAT(x) (1 << (20 + (x)))
+
/* Interrupt masks for IE and ISE register */
#define CC_EN (1 << 0)
#define TC_EN (1 << 1)
#define BWR_EN (1 << 4)
#define BRR_EN (1 << 5)
+#define CIRQ_EN (1 << 8)
#define ERR_EN (1 << 15)
#define CTO_EN (1 << 16)
#define CCRC_EN (1 << 17)
@@ -140,7 +150,6 @@
#define VDD_3V0 3000000 /* 300000 uV */
#define VDD_165_195 (ffs(MMC_VDD_165_195) - 1)
-#define AUTO_CMD23 (1 << 1) /* Auto CMD23 support */
/*
* One controller can have multiple slots, like on some omap boards using
* omap.c controller driver. Luckily this is not currently done on any known
@@ -194,6 +203,7 @@ struct omap_hsmmc_host {
u32 sysctl;
u32 capa;
int irq;
+ int wake_irq;
int use_dma, dma_ch;
struct dma_chan *tx_chan;
struct dma_chan *rx_chan;
@@ -206,6 +216,9 @@ struct omap_hsmmc_host {
int req_in_progress;
unsigned long clk_rate;
unsigned int flags;
+#define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */
+#define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */
+#define HSMMC_WAKE_IRQ_ENABLED (1 << 2)
struct omap_hsmmc_next next_data;
struct omap_mmc_platform_data *pdata;
};
@@ -510,27 +523,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host,
struct mmc_command *cmd)
{
- unsigned int irq_mask;
+ u32 irq_mask = INT_EN_MASK;
+ unsigned long flags;
if (host->use_dma)
- irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN);
- else
- irq_mask = INT_EN_MASK;
+ irq_mask &= ~(BRR_EN | BWR_EN);
/* Disable timeout for erases */
if (cmd->opcode == MMC_ERASE)
irq_mask &= ~DTO_EN;
+ spin_lock_irqsave(&host->irq_lock, flags);
OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+ /* latch pending CIRQ, but don't signal MMC core */
+ if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+ irq_mask |= CIRQ_EN;
OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+ spin_unlock_irqrestore(&host->irq_lock, flags);
}
static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
{
- OMAP_HSMMC_WRITE(host->base, ISE, 0);
- OMAP_HSMMC_WRITE(host->base, IE, 0);
+ u32 irq_mask = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->irq_lock, flags);
+ /* no transfer running but need to keep cirq if enabled */
+ if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+ irq_mask |= CIRQ_EN;
+ OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+ OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+ spin_unlock_irqrestore(&host->irq_lock, flags);
}
/* Calculate divisor for the given clock frequency */
@@ -667,6 +693,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
capa = VS18;
}
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ hctl |= IWE;
+
OMAP_HSMMC_WRITE(host->base, HCTL,
OMAP_HSMMC_READ(host->base, HCTL) | hctl);
@@ -681,7 +710,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
&& time_before(jiffies, timeout))
;
- omap_hsmmc_disable_irq(host);
+ OMAP_HSMMC_WRITE(host->base, ISE, 0);
+ OMAP_HSMMC_WRITE(host->base, IE, 0);
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
/* Do not initialize card-specific things if the power is off */
if (host->power_mode == MMC_POWER_OFF)
@@ -1118,8 +1149,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
int status;
status = OMAP_HSMMC_READ(host->base, STAT);
- while (status & INT_EN_MASK && host->req_in_progress) {
- omap_hsmmc_do_irq(host, status);
+ while (status & (INT_EN_MASK | CIRQ_EN)) {
+ if (host->req_in_progress)
+ omap_hsmmc_do_irq(host, status);
+
+ if (status & CIRQ_EN)
+ mmc_signal_sdio_irq(host->mmc);
/* Flush posted write */
status = OMAP_HSMMC_READ(host->base, STAT);
@@ -1128,6 +1163,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
+{
+ struct omap_hsmmc_host *host = dev_id;
+
+ /* cirq is level triggered, disable to avoid infinite loop */
+ spin_lock(&host->irq_lock);
+ if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+ disable_irq_nosync(host->wake_irq);
+ host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+ }
+ spin_unlock(&host->irq_lock);
+ pm_request_resume(host->dev); /* no use counter */
+
+ return IRQ_HANDLED;
+}
+
static void set_sd_bus_power(struct omap_hsmmc_host *host)
{
unsigned long i;
@@ -1639,6 +1690,103 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
mmc_slot(host).init_card(card);
}
+static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct omap_hsmmc_host *host = mmc_priv(mmc);
+ u32 irq_mask, con;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->irq_lock, flags);
+
+ con = OMAP_HSMMC_READ(host->base, CON);
+ irq_mask = OMAP_HSMMC_READ(host->base, ISE);
+ if (enable) {
+ host->flags |= HSMMC_SDIO_IRQ_ENABLED;
+ irq_mask |= CIRQ_EN;
+ con |= CTPL | CLKEXTFREE;
+ } else {
+ host->flags &= ~HSMMC_SDIO_IRQ_ENABLED;
+ irq_mask &= ~CIRQ_EN;
+ con &= ~(CTPL | CLKEXTFREE);
+ }
+ OMAP_HSMMC_WRITE(host->base, CON, con);
+ OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+
+ /*
+ * if enable, piggy back detection on current request
+ * but always disable immediately
+ */
+ if (!host->req_in_progress || !enable)
+ OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+ /* flush posted write */
+ OMAP_HSMMC_READ(host->base, IE);
+
+ spin_unlock_irqrestore(&host->irq_lock, flags);
+}
+
+static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
+{
+ struct mmc_host *mmc = host->mmc;
+ int ret;
+
+ /*
+ * For omaps with wake-up path, wakeirq will be irq from pinctrl and
+ * for other omaps, wakeirq will be from GPIO (dat line remuxed to
+ * gpio). wakeirq is needed to detect sdio irq in runtime suspend state
+ * with functional clock disabled.
+ */
+ if (!host->dev->of_node || !host->wake_irq)
+ return -ENODEV;
+
+ /* Prevent auto-enabling of IRQ */
+ irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
+ ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ mmc_hostname(mmc), host);
+ if (ret) {
+ dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
+ goto err;
+ }
+
+ /*
+ * Some omaps don't have wake-up path from deeper idle states
+ * and need to remux SDIO DAT1 to GPIO for wake-up from idle.
+ */
+ if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
+ struct pinctrl *p = devm_pinctrl_get(host->dev);
+ if (!p) {
+ ret = -ENODEV;
+ goto err_free_irq;
+ }
+ if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_DEFAULT))) {
+ dev_info(host->dev, "missing default pinctrl state\n");
+ devm_pinctrl_put(p);
+ ret = -EINVAL;
+ goto err_free_irq;
+ }
+
+ if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_IDLE))) {
+ dev_info(host->dev, "missing idle pinctrl state\n");
+ devm_pinctrl_put(p);
+ ret = -EINVAL;
+ goto err_free_irq;
+ }
+ devm_pinctrl_put(p);
+ }
+
+ OMAP_HSMMC_WRITE(host->base, HCTL,
+ OMAP_HSMMC_READ(host->base, HCTL) | IWE);
+ return 0;
+
+err_free_irq:
+ devm_free_irq(host->dev, host->wake_irq, host);
+err:
+ dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
+ host->wake_irq = 0;
+ return ret;
+}
+
static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
{
u32 hctl, capa, value;
@@ -1691,7 +1839,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = {
.get_cd = omap_hsmmc_get_cd,
.get_ro = omap_hsmmc_get_ro,
.init_card = omap_hsmmc_init_card,
- /* NYET -- enable_sdio_irq */
+ .enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
};
#ifdef CONFIG_DEBUG_FS
@@ -1701,13 +1849,23 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
struct mmc_host *mmc = s->private;
struct omap_hsmmc_host *host = mmc_priv(mmc);
- seq_printf(s, "mmc%d:\n ctx_loss:\t%d\n\nregs:\n",
- mmc->index, host->context_loss);
+ seq_printf(s, "mmc%d:\n", mmc->index);
+ seq_printf(s, "sdio irq mode\t%s\n",
+ (mmc->caps & MMC_CAP_SDIO_IRQ) ? "interrupt" : "polling");
- pm_runtime_get_sync(host->dev);
+ if (mmc->caps & MMC_CAP_SDIO_IRQ) {
+ seq_printf(s, "sdio irq \t%s\n",
+ (host->flags & HSMMC_SDIO_IRQ_ENABLED) ? "enabled"
+ : "disabled");
+ }
+ seq_printf(s, "ctx_loss:\t%d\n", host->context_loss);
+ pm_runtime_get_sync(host->dev);
+ seq_puts(s, "\nregs:\n");
seq_printf(s, "CON:\t\t0x%08x\n",
OMAP_HSMMC_READ(host->base, CON));
+ seq_printf(s, "PSTATE:\t\t0x%08x\n",
+ OMAP_HSMMC_READ(host->base, PSTATE));
seq_printf(s, "HCTL:\t\t0x%08x\n",
OMAP_HSMMC_READ(host->base, HCTL));
seq_printf(s, "SYSCTL:\t\t0x%08x\n",
@@ -1761,6 +1919,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = {
static const struct omap_mmc_of_data omap4_mmc_of_data = {
.reg_offset = 0x100,
};
+static const struct omap_mmc_of_data am33xx_mmc_of_data = {
+ .reg_offset = 0x100,
+ .controller_flags = OMAP_HSMMC_SWAKEUP_MISSING,
+};
static const struct of_device_id omap_mmc_of_match[] = {
{
@@ -1777,6 +1939,10 @@ static const struct of_device_id omap_mmc_of_match[] = {
.compatible = "ti,omap4-hsmmc",
.data = &omap4_mmc_of_data,
},
+ {
+ .compatible = "ti,am33xx-hsmmc",
+ .data = &am33xx_mmc_of_data,
+ },
{},
};
MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
@@ -1850,7 +2016,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
const struct of_device_id *match;
dma_cap_mask_t mask;
unsigned tx_req, rx_req;
- struct pinctrl *pinctrl;
const struct omap_mmc_of_data *data;
void __iomem *base;
@@ -1913,6 +2078,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, host);
+ if (pdev->dev.of_node)
+ host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1);
+
mmc->ops = &omap_hsmmc_ops;
mmc->f_min = OMAP_MMC_MIN_CLOCK;
@@ -2061,10 +2229,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
omap_hsmmc_disable_irq(host);
- pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
- if (IS_ERR(pinctrl))
- dev_warn(&pdev->dev,
- "pins are not configured from the driver\n");
+ /*
+ * For now, only support SDIO interrupt if we have a separate
+ * wake-up interrupt configured from device tree. This is because
+ * the wake-up interrupt is needed for idle state and some
+ * platforms need special quirks. And we don't want to add new
+ * legacy mux platform init code callbacks any longer as we
+ * are moving to DT based booting anyways.
+ */
+ ret = omap_hsmmc_configure_wake_irq(host);
+ if (!ret)
+ mmc->caps |= MMC_CAP_SDIO_IRQ;
omap_hsmmc_protect_card(host);
@@ -2170,11 +2345,18 @@ static int omap_hsmmc_suspend(struct device *dev)
pm_runtime_get_sync(host->dev);
if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) {
- omap_hsmmc_disable_irq(host);
+ OMAP_HSMMC_WRITE(host->base, ISE, 0);
+ OMAP_HSMMC_WRITE(host->base, IE, 0);
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
OMAP_HSMMC_WRITE(host->base, HCTL,
OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
}
+ /* do not wake up due to sdio irq */
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+ disable_irq(host->wake_irq);
+
if (host->dbclk)
clk_disable_unprepare(host->dbclk);
@@ -2200,6 +2382,10 @@ static int omap_hsmmc_resume(struct device *dev)
omap_hsmmc_protect_card(host);
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+ enable_irq(host->wake_irq);
+
pm_runtime_mark_last_busy(host->dev);
pm_runtime_put_autosuspend(host->dev);
return 0;
@@ -2215,22 +2401,77 @@ static int omap_hsmmc_resume(struct device *dev)
static int omap_hsmmc_runtime_suspend(struct device *dev)
{
struct omap_hsmmc_host *host;
+ unsigned long flags;
+ int ret = 0;
host = platform_get_drvdata(to_platform_device(dev));
omap_hsmmc_context_save(host);
dev_dbg(dev, "disabled\n");
- return 0;
+ spin_lock_irqsave(&host->irq_lock, flags);
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+ /* disable sdio irq handling to prevent race */
+ OMAP_HSMMC_WRITE(host->base, ISE, 0);
+ OMAP_HSMMC_WRITE(host->base, IE, 0);
+
+ if (!(OMAP_HSMMC_READ(host->base, PSTATE) & DLEV_DAT(1))) {
+ /*
+ * dat1 line low, pending sdio irq
+ * race condition: possible irq handler running on
+ * multi-core, abort
+ */
+ dev_dbg(dev, "pending sdio irq, abort suspend\n");
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+ OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+ OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+ pm_runtime_mark_last_busy(dev);
+ ret = -EBUSY;
+ goto abort;
+ }
+
+ pinctrl_pm_select_idle_state(dev);
+
+ WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
+ enable_irq(host->wake_irq);
+ host->flags |= HSMMC_WAKE_IRQ_ENABLED;
+ } else {
+ pinctrl_pm_select_idle_state(dev);
+ }
+
+abort:
+ spin_unlock_irqrestore(&host->irq_lock, flags);
+ return ret;
}
static int omap_hsmmc_runtime_resume(struct device *dev)
{
struct omap_hsmmc_host *host;
+ unsigned long flags;
host = platform_get_drvdata(to_platform_device(dev));
omap_hsmmc_context_restore(host);
dev_dbg(dev, "enabled\n");
+ spin_lock_irqsave(&host->irq_lock, flags);
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+ /* sdio irq flag can't change while in runtime suspend */
+ if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+ disable_irq_nosync(host->wake_irq);
+ host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+ }
+
+ pinctrl_pm_select_default_state(host->dev);
+
+ /* irq lost, if pinmux incorrect */
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+ OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+ OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+ } else {
+ pinctrl_pm_select_default_state(host->dev);
+ }
+ spin_unlock_irqrestore(&host->irq_lock, flags);
return 0;
}
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f23782683a7..e5516a22636 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -12,6 +12,7 @@
*/
#include <linux/module.h>
+#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/clk.h>
#include <linux/mmc/host.h>
@@ -27,6 +28,7 @@
#include <mach/dma.h>
#include <mach/gpio-samsung.h>
+#include <linux/platform_data/dma-s3c24xx.h>
#include <linux/platform_data/mmc-s3cmci.h>
#include "s3cmci.h"
@@ -140,10 +142,6 @@ static const int dbgmap_debug = dbg_err | dbg_debug;
dev_dbg(&host->pdev->dev, args); \
} while (0)
-static struct s3c2410_dma_client s3cmci_dma_client = {
- .name = "s3c-mci",
-};
-
static void finalize_request(struct s3cmci_host *host);
static void s3cmci_send_request(struct mmc_host *mmc);
static void s3cmci_reset(struct s3cmci_host *host);
@@ -256,25 +254,8 @@ static inline bool s3cmci_host_usedma(struct s3cmci_host *host)
{
#ifdef CONFIG_MMC_S3C_PIO
return false;
-#elif defined(CONFIG_MMC_S3C_DMA)
+#else /* CONFIG_MMC_S3C_DMA */
return true;
-#else
- return host->dodma;
-#endif
-}
-
-/**
- * s3cmci_host_canpio - return true if host has pio code available
- *
- * Return true if the driver has been compiled with the PIO support code
- * available.
- */
-static inline bool s3cmci_host_canpio(void)
-{
-#ifdef CONFIG_MMC_S3C_PIO
- return true;
-#else
- return false;
#endif
}
@@ -841,60 +822,24 @@ static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch,
- void *buf_id, int size,
- enum s3c2410_dma_buffresult result)
+static void s3cmci_dma_done_callback(void *arg)
{
- struct s3cmci_host *host = buf_id;
+ struct s3cmci_host *host = arg;
unsigned long iflags;
- u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt;
-
- mci_csta = readl(host->base + S3C2410_SDICMDSTAT);
- mci_dsta = readl(host->base + S3C2410_SDIDSTA);
- mci_fsta = readl(host->base + S3C2410_SDIFSTA);
- mci_dcnt = readl(host->base + S3C2410_SDIDCNT);
BUG_ON(!host->mrq);
BUG_ON(!host->mrq->data);
- BUG_ON(!host->dmatogo);
spin_lock_irqsave(&host->complete_lock, iflags);
- if (result != S3C2410_RES_OK) {
- dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x "
- "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n",
- mci_csta, mci_dsta, mci_fsta,
- mci_dcnt, result, host->dmatogo);
-
- goto fail_request;
- }
-
- host->dmatogo--;
- if (host->dmatogo) {
- dbg(host, dbg_dma, "DMA DONE Size:%i DSTA:[%08x] "
- "DCNT:[%08x] toGo:%u\n",
- size, mci_dsta, mci_dcnt, host->dmatogo);
-
- goto out;
- }
-
- dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n",
- size, mci_dsta, mci_dcnt);
+ dbg(host, dbg_dma, "DMA FINISHED\n");
host->dma_complete = 1;
host->complete_what = COMPLETION_FINALIZE;
-out:
tasklet_schedule(&host->pio_tasklet);
spin_unlock_irqrestore(&host->complete_lock, iflags);
- return;
-fail_request:
- host->mrq->data->error = -EINVAL;
- host->complete_what = COMPLETION_FINALIZE;
- clear_imask(host);
-
- goto out;
}
static void finalize_request(struct s3cmci_host *host)
@@ -966,7 +911,7 @@ static void finalize_request(struct s3cmci_host *host)
* DMA channel and the fifo to clear out any garbage. */
if (mrq->data->error != 0) {
if (s3cmci_host_usedma(host))
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
+ dmaengine_terminate_all(host->dma);
if (host->is2440) {
/* Clear failure register and reset fifo. */
@@ -992,29 +937,6 @@ request_done:
mmc_request_done(host->mmc, mrq);
}
-static void s3cmci_dma_setup(struct s3cmci_host *host,
- enum dma_data_direction source)
-{
- static enum dma_data_direction last_source = -1;
- static int setup_ok;
-
- if (last_source == source)
- return;
-
- last_source = source;
-
- s3c2410_dma_devconfig(host->dma, source,
- host->mem->start + host->sdidata);
-
- if (!setup_ok) {
- s3c2410_dma_config(host->dma, 4);
- s3c2410_dma_set_buffdone_fn(host->dma,
- s3cmci_dma_done_callback);
- s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART);
- setup_ok = 1;
- }
-}
-
static void s3cmci_send_command(struct s3cmci_host *host,
struct mmc_command *cmd)
{
@@ -1162,43 +1084,45 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
{
- int dma_len, i;
int rw = data->flags & MMC_DATA_WRITE;
+ struct dma_async_tx_descriptor *desc;
+ struct dma_slave_config conf = {
+ .src_addr = host->mem->start + host->sdidata,
+ .dst_addr = host->mem->start + host->sdidata,
+ .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ };
BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
- s3cmci_dma_setup(host, rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
-
- dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
- rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- if (dma_len == 0)
- return -ENOMEM;
-
- host->dma_complete = 0;
- host->dmatogo = dma_len;
-
- for (i = 0; i < dma_len; i++) {
- int res;
-
- dbg(host, dbg_dma, "enqueue %i: %08x@%u\n", i,
- sg_dma_address(&data->sg[i]),
- sg_dma_len(&data->sg[i]));
+ /* Restore prescaler value */
+ writel(host->prescaler, host->base + S3C2410_SDIPRE);
- res = s3c2410_dma_enqueue(host->dma, host,
- sg_dma_address(&data->sg[i]),
- sg_dma_len(&data->sg[i]));
+ if (!rw)
+ conf.direction = DMA_DEV_TO_MEM;
+ else
+ conf.direction = DMA_MEM_TO_DEV;
- if (res) {
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
- return -EBUSY;
- }
- }
+ dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START);
+ dmaengine_slave_config(host->dma, &conf);
+ desc = dmaengine_prep_slave_sg(host->dma, data->sg, data->sg_len,
+ conf.direction,
+ DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+ if (!desc)
+ goto unmap_exit;
+ desc->callback = s3cmci_dma_done_callback;
+ desc->callback_param = host;
+ dmaengine_submit(desc);
+ dma_async_issue_pending(host->dma);
return 0;
+
+unmap_exit:
+ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ return -ENOMEM;
}
static void s3cmci_send_request(struct mmc_host *mmc)
@@ -1676,10 +1600,6 @@ static int s3cmci_probe(struct platform_device *pdev)
host->complete_what = COMPLETION_NONE;
host->pio_active = XFER_NONE;
-#ifdef CONFIG_MMC_S3C_PIODMA
- host->dodma = host->pdata->use_dma;
-#endif
-
host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!host->mem) {
dev_err(&pdev->dev,
@@ -1765,17 +1685,17 @@ static int s3cmci_probe(struct platform_device *pdev)
/* depending on the dma state, get a dma channel to use. */
if (s3cmci_host_usedma(host)) {
- host->dma = s3c2410_dma_request(DMACH_SDI, &s3cmci_dma_client,
- host);
- if (host->dma < 0) {
+ dma_cap_mask_t mask;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ host->dma = dma_request_slave_channel_compat(mask,
+ s3c24xx_dma_filter, (void *)DMACH_SDI, &pdev->dev, "rx-tx");
+ if (!host->dma) {
dev_err(&pdev->dev, "cannot get DMA channel.\n");
- if (!s3cmci_host_canpio()) {
- ret = -EBUSY;
- goto probe_free_gpio_wp;
- } else {
- dev_warn(&pdev->dev, "falling back to PIO.\n");
- host->dodma = 0;
- }
+ ret = -EBUSY;
+ goto probe_free_gpio_wp;
}
}
@@ -1787,7 +1707,7 @@ static int s3cmci_probe(struct platform_device *pdev)
goto probe_free_dma;
}
- ret = clk_enable(host->clk);
+ ret = clk_prepare_enable(host->clk);
if (ret) {
dev_err(&pdev->dev, "failed to enable clock source.\n");
goto clk_free;
@@ -1816,7 +1736,7 @@ static int s3cmci_probe(struct platform_device *pdev)
mmc->max_segs = 128;
dbg(host, dbg_debug,
- "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
+ "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%p.\n",
(host->is2440?"2440":""),
host->base, host->irq, host->irq_cd, host->dma);
@@ -1845,14 +1765,14 @@ static int s3cmci_probe(struct platform_device *pdev)
s3cmci_cpufreq_deregister(host);
free_dmabuf:
- clk_disable(host->clk);
+ clk_disable_unprepare(host->clk);
clk_free:
clk_put(host->clk);
probe_free_dma:
if (s3cmci_host_usedma(host))
- s3c2410_dma_free(host->dma, &s3cmci_dma_client);
+ dma_release_channel(host->dma);
probe_free_gpio_wp:
if (!host->pdata->no_wprotect)
@@ -1897,7 +1817,7 @@ static void s3cmci_shutdown(struct platform_device *pdev)
s3cmci_debugfs_remove(host);
s3cmci_cpufreq_deregister(host);
mmc_remove_host(mmc);
- clk_disable(host->clk);
+ clk_disable_unprepare(host->clk);
}
static int s3cmci_remove(struct platform_device *pdev)
@@ -1914,7 +1834,7 @@ static int s3cmci_remove(struct platform_device *pdev)
tasklet_disable(&host->pio_tasklet);
if (s3cmci_host_usedma(host))
- s3c2410_dma_free(host->dma, &s3cmci_dma_client);
+ dma_release_channel(host->dma);
free_irq(host->irq, host);
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index c76b53dbeb6..cc2e46cb5c6 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -26,7 +26,7 @@ struct s3cmci_host {
void __iomem *base;
int irq;
int irq_cd;
- int dma;
+ struct dma_chan *dma;
unsigned long clk_rate;
unsigned long clk_div;
@@ -36,8 +36,6 @@ struct s3cmci_host {
int is2440;
unsigned sdiimsk;
unsigned sdidata;
- int dodma;
- int dmatogo;
bool irq_disabled;
bool irq_enabled;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 8ce3c28cb76..8c5337002c5 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -124,9 +124,11 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
.chip = &sdhci_acpi_chip_int,
- .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_HW_RESET,
+ .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
+ MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR,
.caps2 = MMC_CAP2_HC_ERASE_SZ,
.flags = SDHCI_ACPI_RUNTIME_PM,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
};
static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 40573a58486..1a6661ed620 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -16,7 +16,6 @@
#include <linux/module.h>
#include <linux/of_device.h>
-#include <linux/regulator/consumer.h>
#include <linux/delay.h>
#include <linux/mmc/mmc.h>
#include <linux/slab.h>
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 52c42fcc284..c3a1debc928 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -103,6 +103,10 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
};
+static const struct sdhci_pci_fixes sdhci_intel_qrk = {
+ .quirks = SDHCI_QUIRK_NO_HISPD_BIT,
+};
+
static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot)
{
slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
@@ -264,7 +268,7 @@ static void sdhci_pci_int_hw_reset(struct sdhci_host *host)
static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
{
slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
- MMC_CAP_HW_RESET;
+ MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR;
slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
slot->hw_reset = sdhci_pci_int_hw_reset;
return 0;
@@ -279,6 +283,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
.allow_runtime_pm = true,
.probe_slot = byt_emmc_probe_slot,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
};
static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
@@ -753,6 +758,14 @@ static const struct pci_device_id pci_ids[] = {
{
.vendor = PCI_VENDOR_ID_INTEL,
+ .device = PCI_DEVICE_ID_INTEL_QRK_SD,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_intel_qrk,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_INTEL,
.device = PCI_DEVICE_ID_INTEL_MRST_SD0,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
@@ -1130,18 +1143,13 @@ static int sdhci_pci_suspend(struct device *dev)
goto err_pci_suspend;
}
- pci_save_state(pdev);
if (pm_flags & MMC_PM_KEEP_POWER) {
- if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) {
- pci_pme_active(pdev, true);
- pci_enable_wake(pdev, PCI_D3hot, 1);
- }
- pci_set_power_state(pdev, PCI_D3hot);
- } else {
- pci_enable_wake(pdev, PCI_D3hot, 0);
- pci_disable_device(pdev);
- pci_set_power_state(pdev, PCI_D3hot);
- }
+ if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+ device_init_wakeup(dev, true);
+ else
+ device_init_wakeup(dev, false);
+ } else
+ device_init_wakeup(dev, false);
return 0;
@@ -1162,12 +1170,6 @@ static int sdhci_pci_resume(struct device *dev)
if (!chip)
return 0;
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
-
if (chip->fixes && chip->fixes->resume) {
ret = chip->fixes->resume(chip);
if (ret)
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 6d718719659..c101477ef3b 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -17,6 +17,7 @@
#define PCI_DEVICE_ID_INTEL_CLV_SDIO2 0x08fb
#define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5
#define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6
+#define PCI_DEVICE_ID_INTEL_QRK_SD 0x08A7
/*
* PCI registers
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index f4f12894756..6f842fb8e6b 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -288,15 +288,13 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
int ret;
struct clk *clk;
- pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
+ pxa = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_pxa), GFP_KERNEL);
if (!pxa)
return -ENOMEM;
host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0);
- if (IS_ERR(host)) {
- kfree(pxa);
+ if (IS_ERR(host))
return PTR_ERR(host);
- }
if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) {
ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info());
@@ -308,7 +306,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
pltfm_host = sdhci_priv(host);
pltfm_host->priv = pxa;
- clk = clk_get(dev, NULL);
+ clk = devm_clk_get(dev, NULL);
if (IS_ERR(clk)) {
dev_err(dev, "failed to get io clock\n");
ret = PTR_ERR(clk);
@@ -389,11 +387,9 @@ err_add_host:
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
clk_disable_unprepare(clk);
- clk_put(clk);
err_clk_get:
err_mbus_win:
sdhci_pltfm_free(pdev);
- kfree(pxa);
return ret;
}
@@ -401,17 +397,14 @@ static int sdhci_pxav3_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
- struct sdhci_pxa *pxa = pltfm_host->priv;
pm_runtime_get_sync(&pdev->dev);
sdhci_remove_host(host, 1);
pm_runtime_disable(&pdev->dev);
clk_disable_unprepare(pltfm_host->clk);
- clk_put(pltfm_host->clk);
sdhci_pltfm_free(pdev);
- kfree(pxa);
return 0;
}
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
new file mode 100644
index 00000000000..328f348c724
--- /dev/null
+++ b/drivers/mmc/host/sdhci-st.c
@@ -0,0 +1,176 @@
+/*
+ * Support for SDHCI on STMicroelectronics SoCs
+ *
+ * Copyright (C) 2014 STMicroelectronics Ltd
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ * Contributors: Peter Griffin <peter.griffin@linaro.org>
+ *
+ * Based on sdhci-cns3xxx.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mmc/host.h>
+
+#include "sdhci-pltfm.h"
+
+static u32 sdhci_st_readl(struct sdhci_host *host, int reg)
+{
+ u32 ret;
+
+ switch (reg) {
+ case SDHCI_CAPABILITIES:
+ ret = readl_relaxed(host->ioaddr + reg);
+ /* Support 3.3V and 1.8V */
+ ret &= ~SDHCI_CAN_VDD_300;
+ break;
+ default:
+ ret = readl_relaxed(host->ioaddr + reg);
+ }
+ return ret;
+}
+
+static const struct sdhci_ops sdhci_st_ops = {
+ .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .read_l = sdhci_st_readl,
+ .reset = sdhci_reset,
+};
+
+static const struct sdhci_pltfm_data sdhci_st_pdata = {
+ .ops = &sdhci_st_ops,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+};
+
+
+static int sdhci_st_probe(struct platform_device *pdev)
+{
+ struct sdhci_host *host;
+ struct sdhci_pltfm_host *pltfm_host;
+ struct clk *clk;
+ int ret = 0;
+ u16 host_version;
+
+ clk = devm_clk_get(&pdev->dev, "mmc");
+ if (IS_ERR(clk)) {
+ dev_err(&pdev->dev, "Peripheral clk not found\n");
+ return PTR_ERR(clk);
+ }
+
+ host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
+ if (IS_ERR(host)) {
+ dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
+ return PTR_ERR(host);
+ }
+
+ ret = mmc_of_parse(host->mmc);
+
+ if (ret) {
+ dev_err(&pdev->dev, "Failed mmc_of_parse\n");
+ return ret;
+ }
+
+ clk_prepare_enable(clk);
+
+ pltfm_host = sdhci_priv(host);
+ pltfm_host->clk = clk;
+
+ ret = sdhci_add_host(host);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed sdhci_add_host\n");
+ goto err_out;
+ }
+
+ platform_set_drvdata(pdev, host);
+
+ host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
+
+ dev_info(&pdev->dev, "SDHCI ST Initialised: Host Version: 0x%x Vendor Version 0x%x\n",
+ ((host_version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT),
+ ((host_version & SDHCI_VENDOR_VER_MASK) >>
+ SDHCI_VENDOR_VER_SHIFT));
+
+ return 0;
+
+err_out:
+ clk_disable_unprepare(clk);
+ sdhci_pltfm_free(pdev);
+
+ return ret;
+}
+
+static int sdhci_st_remove(struct platform_device *pdev)
+{
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ clk_disable_unprepare(pltfm_host->clk);
+
+ return sdhci_pltfm_unregister(pdev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sdhci_st_suspend(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ int ret = sdhci_suspend_host(host);
+
+ if (ret)
+ goto out;
+
+ clk_disable_unprepare(pltfm_host->clk);
+out:
+ return ret;
+}
+
+static int sdhci_st_resume(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ clk_prepare_enable(pltfm_host->clk);
+
+ return sdhci_resume_host(host);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sdhci_st_pmops, sdhci_st_suspend, sdhci_st_resume);
+
+static const struct of_device_id st_sdhci_match[] = {
+ { .compatible = "st,sdhci" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, st_sdhci_match);
+
+static struct platform_driver sdhci_st_driver = {
+ .probe = sdhci_st_probe,
+ .remove = sdhci_st_remove,
+ .driver = {
+ .name = "sdhci-st",
+ .pm = &sdhci_st_pmops,
+ .of_match_table = of_match_ptr(st_sdhci_match),
+ },
+};
+
+module_platform_driver(sdhci_st_driver);
+
+MODULE_DESCRIPTION("SDHCI driver for STMicroelectronics SoCs");
+MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:st-sdhci");
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index d93a063a36f..33100d10d17 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -26,8 +26,6 @@
#include <linux/mmc/host.h>
#include <linux/mmc/slot-gpio.h>
-#include <asm/gpio.h>
-
#include "sdhci-pltfm.h"
/* Tegra SDHOST controller vendor register definitions */
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 47055f3f01b..37b2a9ae52e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1223,8 +1223,16 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock);
static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
unsigned short vdd)
{
+ struct mmc_host *mmc = host->mmc;
u8 pwr = 0;
+ if (!IS_ERR(mmc->supply.vmmc)) {
+ spin_unlock_irq(&host->lock);
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+ spin_lock_irq(&host->lock);
+ return;
+ }
+
if (mode != MMC_POWER_OFF) {
switch (1 << vdd) {
case MMC_VDD_165_195:
@@ -1283,12 +1291,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
mdelay(10);
}
-
- if (host->vmmc) {
- spin_unlock_irq(&host->lock);
- mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
- spin_lock_irq(&host->lock);
- }
}
/*****************************************************************************\
@@ -1440,13 +1442,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
{
unsigned long flags;
u8 ctrl;
+ struct mmc_host *mmc = host->mmc;
spin_lock_irqsave(&host->lock, flags);
if (host->flags & SDHCI_DEVICE_DEAD) {
spin_unlock_irqrestore(&host->lock, flags);
- if (host->vmmc && ios->power_mode == MMC_POWER_OFF)
- mmc_regulator_set_ocr(host->mmc, host->vmmc, 0);
+ if (!IS_ERR(mmc->supply.vmmc) &&
+ ios->power_mode == MMC_POWER_OFF)
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
return;
}
@@ -1530,7 +1534,6 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
host->ops->set_clock(host, host->clock);
}
-
/* Reset SD Clock Enable */
clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
clk &= ~SDHCI_CLOCK_CARD_EN;
@@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
struct mmc_ios *ios)
{
+ struct mmc_host *mmc = host->mmc;
u16 ctrl;
int ret;
@@ -1725,11 +1729,12 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
ctrl &= ~SDHCI_CTRL_VDD_180;
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
- if (host->vqmmc) {
- ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000);
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000,
+ 3600000);
if (ret) {
pr_warning("%s: Switching to 3.3V signalling voltage "
- " failed\n", mmc_hostname(host->mmc));
+ " failed\n", mmc_hostname(mmc));
return -EIO;
}
}
@@ -1742,16 +1747,16 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
return 0;
pr_warning("%s: 3.3V regulator output did not became stable\n",
- mmc_hostname(host->mmc));
+ mmc_hostname(mmc));
return -EAGAIN;
case MMC_SIGNAL_VOLTAGE_180:
- if (host->vqmmc) {
- ret = regulator_set_voltage(host->vqmmc,
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_set_voltage(mmc->supply.vqmmc,
1700000, 1950000);
if (ret) {
pr_warning("%s: Switching to 1.8V signalling voltage "
- " failed\n", mmc_hostname(host->mmc));
+ " failed\n", mmc_hostname(mmc));
return -EIO;
}
}
@@ -1763,24 +1768,22 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
ctrl |= SDHCI_CTRL_VDD_180;
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
- /* Wait for 5ms */
- usleep_range(5000, 5500);
-
/* 1.8V regulator output should be stable within 5 ms */
ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
if (ctrl & SDHCI_CTRL_VDD_180)
return 0;
pr_warning("%s: 1.8V regulator output did not became stable\n",
- mmc_hostname(host->mmc));
+ mmc_hostname(mmc));
return -EAGAIN;
case MMC_SIGNAL_VOLTAGE_120:
- if (host->vqmmc) {
- ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000);
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000,
+ 1300000);
if (ret) {
pr_warning("%s: Switching to 1.2V signalling voltage "
- " failed\n", mmc_hostname(host->mmc));
+ " failed\n", mmc_hostname(mmc));
return -EIO;
}
}
@@ -2643,7 +2646,6 @@ static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
int sdhci_runtime_suspend_host(struct sdhci_host *host)
{
unsigned long flags;
- int ret = 0;
/* Disable tuning since we are suspending */
if (host->flags & SDHCI_USING_RETUNING_TIMER) {
@@ -2663,14 +2665,14 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
host->runtime_suspended = true;
spin_unlock_irqrestore(&host->lock, flags);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host);
int sdhci_runtime_resume_host(struct sdhci_host *host)
{
unsigned long flags;
- int ret = 0, host_flags = host->flags;
+ int host_flags = host->flags;
if (host_flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
if (host->ops->enable_dma)
@@ -2709,7 +2711,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
spin_unlock_irqrestore(&host->lock, flags);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host);
@@ -2820,12 +2822,12 @@ int sdhci_add_host(struct sdhci_host *host)
* (128) and potentially one alignment transfer for
* each of those entries.
*/
- host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc),
+ host->adma_desc = dma_alloc_coherent(mmc_dev(mmc),
ADMA_SIZE, &host->adma_addr,
GFP_KERNEL);
host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
if (!host->adma_desc || !host->align_buffer) {
- dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
pr_warning("%s: Unable to allocate ADMA "
@@ -2838,7 +2840,7 @@ int sdhci_add_host(struct sdhci_host *host)
pr_warning("%s: unable to allocate aligned ADMA descriptor\n",
mmc_hostname(mmc));
host->flags &= ~SDHCI_USE_ADMA;
- dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
host->adma_desc = NULL;
@@ -2853,7 +2855,7 @@ int sdhci_add_host(struct sdhci_host *host)
*/
if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) {
host->dma_mask = DMA_BIT_MASK(64);
- mmc_dev(host->mmc)->dma_mask = &host->dma_mask;
+ mmc_dev(mmc)->dma_mask = &host->dma_mask;
}
if (host->version >= SDHCI_SPEC_300)
@@ -2959,28 +2961,25 @@ int sdhci_add_host(struct sdhci_host *host)
mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
- !(host->mmc->caps & MMC_CAP_NONREMOVABLE))
+ !(mmc->caps & MMC_CAP_NONREMOVABLE))
mmc->caps |= MMC_CAP_NEEDS_POLL;
+ /* If there are external regulators, get them */
+ if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
- host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc");
- if (IS_ERR_OR_NULL(host->vqmmc)) {
- if (PTR_ERR(host->vqmmc) < 0) {
- pr_info("%s: no vqmmc regulator found\n",
- mmc_hostname(mmc));
- host->vqmmc = NULL;
- }
- } else {
- ret = regulator_enable(host->vqmmc);
- if (!regulator_is_supported_voltage(host->vqmmc, 1700000,
- 1950000))
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_enable(mmc->supply.vqmmc);
+ if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000,
+ 1950000))
caps[1] &= ~(SDHCI_SUPPORT_SDR104 |
SDHCI_SUPPORT_SDR50 |
SDHCI_SUPPORT_DDR50);
if (ret) {
pr_warn("%s: Failed to enable vqmmc regulator: %d\n",
mmc_hostname(mmc), ret);
- host->vqmmc = NULL;
+ mmc->supply.vqmmc = NULL;
}
}
@@ -3041,34 +3040,6 @@ int sdhci_add_host(struct sdhci_host *host)
ocr_avail = 0;
- host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc");
- if (IS_ERR_OR_NULL(host->vmmc)) {
- if (PTR_ERR(host->vmmc) < 0) {
- pr_info("%s: no vmmc regulator found\n",
- mmc_hostname(mmc));
- host->vmmc = NULL;
- }
- }
-
-#ifdef CONFIG_REGULATOR
- /*
- * Voltage range check makes sense only if regulator reports
- * any voltage value.
- */
- if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) {
- ret = regulator_is_supported_voltage(host->vmmc, 2700000,
- 3600000);
- if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330)))
- caps[0] &= ~SDHCI_CAN_VDD_330;
- if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300)))
- caps[0] &= ~SDHCI_CAN_VDD_300;
- ret = regulator_is_supported_voltage(host->vmmc, 1700000,
- 1950000);
- if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180)))
- caps[0] &= ~SDHCI_CAN_VDD_180;
- }
-#endif /* CONFIG_REGULATOR */
-
/*
* According to SD Host Controller spec v3.00, if the Host System
* can afford more than 150mA, Host Driver should set XPC to 1. Also
@@ -3077,8 +3048,8 @@ int sdhci_add_host(struct sdhci_host *host)
* value.
*/
max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
- if (!max_current_caps && host->vmmc) {
- u32 curr = regulator_get_current_limit(host->vmmc);
+ if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
+ u32 curr = regulator_get_current_limit(mmc->supply.vmmc);
if (curr > 0) {
/* convert to SDHCI_MAX_CURRENT format */
@@ -3118,8 +3089,12 @@ int sdhci_add_host(struct sdhci_host *host)
SDHCI_MAX_CURRENT_MULTIPLIER;
}
+ /* If OCR set by external regulators, use it instead */
+ if (mmc->ocr_avail)
+ ocr_avail = mmc->ocr_avail;
+
if (host->ocr_mask)
- ocr_avail = host->ocr_mask;
+ ocr_avail &= host->ocr_mask;
mmc->ocr_avail = ocr_avail;
mmc->ocr_avail_sdio = ocr_avail;
@@ -3273,6 +3248,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host);
void sdhci_remove_host(struct sdhci_host *host, int dead)
{
+ struct mmc_host *mmc = host->mmc;
unsigned long flags;
if (dead) {
@@ -3282,7 +3258,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
if (host->mrq) {
pr_err("%s: Controller removed during "
- " transfer!\n", mmc_hostname(host->mmc));
+ " transfer!\n", mmc_hostname(mmc));
host->mrq->cmd->error = -ENOMEDIUM;
tasklet_schedule(&host->finish_tasklet);
@@ -3293,7 +3269,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
sdhci_disable_card_detection(host);
- mmc_remove_host(host->mmc);
+ mmc_remove_host(mmc);
#ifdef SDHCI_USE_LEDS_CLASS
led_classdev_unregister(&host->led);
@@ -3310,18 +3286,14 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
tasklet_kill(&host->finish_tasklet);
- if (host->vmmc) {
- regulator_disable(host->vmmc);
- regulator_put(host->vmmc);
- }
+ if (!IS_ERR(mmc->supply.vmmc))
+ regulator_disable(mmc->supply.vmmc);
- if (host->vqmmc) {
- regulator_disable(host->vqmmc);
- regulator_put(host->vqmmc);
- }
+ if (!IS_ERR(mmc->supply.vqmmc))
+ regulator_disable(mmc->supply.vqmmc);
if (host->adma_desc)
- dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 656fbba4c42..d11708c815d 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -386,7 +386,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
struct sh_mmcif_plat_data *pdata,
enum dma_transfer_direction direction)
{
- struct dma_slave_config cfg;
+ struct dma_slave_config cfg = { 0, };
struct dma_chan *chan;
unsigned int slave_id;
struct resource *res;
@@ -417,8 +417,15 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
/* In the OF case the driver will get the slave ID from the DT */
cfg.slave_id = slave_id;
cfg.direction = direction;
- cfg.dst_addr = res->start + MMCIF_CE_DATA;
- cfg.src_addr = 0;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ cfg.src_addr = res->start + MMCIF_CE_DATA;
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ } else {
+ cfg.dst_addr = res->start + MMCIF_CE_DATA;
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ }
+
ret = dmaengine_slave_config(chan, &cfg);
if (ret < 0) {
dma_release_channel(chan);
@@ -1378,26 +1385,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "Get irq error\n");
return -ENXIO;
}
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "platform_get_resource error.\n");
- return -ENXIO;
- }
- reg = ioremap(res->start, resource_size(res));
- if (!reg) {
- dev_err(&pdev->dev, "ioremap error.\n");
- return -ENOMEM;
- }
+ reg = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev);
- if (!mmc) {
- ret = -ENOMEM;
- goto ealloch;
- }
+ if (!mmc)
+ return -ENOMEM;
ret = mmc_of_parse(mmc);
if (ret < 0)
- goto eofparse;
+ goto err_host;
host = mmc_priv(mmc);
host->mmc = mmc;
@@ -1427,19 +1427,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
pm_runtime_enable(&pdev->dev);
host->power = false;
- host->hclk = clk_get(&pdev->dev, NULL);
+ host->hclk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(host->hclk)) {
ret = PTR_ERR(host->hclk);
dev_err(&pdev->dev, "cannot get clock: %d\n", ret);
- goto eclkget;
+ goto err_pm;
}
ret = sh_mmcif_clk_update(host);
if (ret < 0)
- goto eclkupdate;
+ goto err_pm;
ret = pm_runtime_resume(&pdev->dev);
if (ret < 0)
- goto eresume;
+ goto err_clk;
INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work);
@@ -1447,65 +1447,55 @@ static int sh_mmcif_probe(struct platform_device *pdev)
sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
name = irq[1] < 0 ? dev_name(&pdev->dev) : "sh_mmc:error";
- ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, name, host);
+ ret = devm_request_threaded_irq(&pdev->dev, irq[0], sh_mmcif_intr,
+ sh_mmcif_irqt, 0, name, host);
if (ret) {
dev_err(&pdev->dev, "request_irq error (%s)\n", name);
- goto ereqirq0;
+ goto err_clk;
}
if (irq[1] >= 0) {
- ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt,
- 0, "sh_mmc:int", host);
+ ret = devm_request_threaded_irq(&pdev->dev, irq[1],
+ sh_mmcif_intr, sh_mmcif_irqt,
+ 0, "sh_mmc:int", host);
if (ret) {
dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n");
- goto ereqirq1;
+ goto err_clk;
}
}
if (pd && pd->use_cd_gpio) {
ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0);
if (ret < 0)
- goto erqcd;
+ goto err_clk;
}
mutex_init(&host->thread_lock);
- clk_disable_unprepare(host->hclk);
ret = mmc_add_host(mmc);
if (ret < 0)
- goto emmcaddh;
+ goto err_clk;
dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
- dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION);
- dev_dbg(&pdev->dev, "chip ver H'%04x\n",
- sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff);
+ dev_info(&pdev->dev, "Chip version 0x%04x, clock rate %luMHz\n",
+ sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff,
+ clk_get_rate(host->hclk) / 1000000UL);
+
+ clk_disable_unprepare(host->hclk);
return ret;
-emmcaddh:
-erqcd:
- if (irq[1] >= 0)
- free_irq(irq[1], host);
-ereqirq1:
- free_irq(irq[0], host);
-ereqirq0:
- pm_runtime_suspend(&pdev->dev);
-eresume:
+err_clk:
clk_disable_unprepare(host->hclk);
-eclkupdate:
- clk_put(host->hclk);
-eclkget:
+err_pm:
pm_runtime_disable(&pdev->dev);
-eofparse:
+err_host:
mmc_free_host(mmc);
-ealloch:
- iounmap(reg);
return ret;
}
static int sh_mmcif_remove(struct platform_device *pdev)
{
struct sh_mmcif_host *host = platform_get_drvdata(pdev);
- int irq[2];
host->dying = true;
clk_prepare_enable(host->hclk);
@@ -1523,16 +1513,6 @@ static int sh_mmcif_remove(struct platform_device *pdev)
*/
cancel_delayed_work_sync(&host->timeout_work);
- if (host->addr)
- iounmap(host->addr);
-
- irq[0] = platform_get_irq(pdev, 0);
- irq[1] = platform_get_irq(pdev, 1);
-
- free_irq(irq[0], host);
- if (irq[1] >= 0)
- free_irq(irq[1], host);
-
clk_disable_unprepare(host->hclk);
mmc_free_host(host->mmc);
pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 03e7b280cb4..eb8f1d5c34b 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -294,6 +294,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
cfg.slave_id = pdata->dma->slave_id_tx;
cfg.direction = DMA_MEM_TO_DEV;
cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->pdata->bus_shift);
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
cfg.src_addr = 0;
ret = dmaengine_slave_config(host->chan_tx, &cfg);
if (ret < 0)
@@ -312,6 +313,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
cfg.slave_id = pdata->dma->slave_id_rx;
cfg.direction = DMA_DEV_TO_MEM;
cfg.src_addr = cfg.dst_addr;
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
cfg.dst_addr = 0;
ret = dmaengine_slave_config(host->chan_rx, &cfg);
if (ret < 0)
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 282891a8e45..54181b4f6e9 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -72,7 +72,6 @@
#define BM_SPI_CS 0x20
#define BM_SD_POWER 0x40
#define BM_SOFT_RESET 0x80
-#define BM_ONEBIT_MASK 0xFD
/* SDMMC_BLKLEN bit fields */
#define BLKL_CRCERR_ABORT 0x0800
@@ -120,6 +119,8 @@
#define STS2_DATARSP_BUSY 0x20
#define STS2_DIS_FORCECLK 0x80
+/* SDMMC_EXTCTRL bit fields */
+#define EXT_EIGHTBIT 0x04
/* MMC/SD DMA Controller Registers */
#define SDDMA_GCR 0x100
@@ -672,7 +673,7 @@ static void wmt_mci_request(struct mmc_host *mmc, struct mmc_request *req)
static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
struct wmt_mci_priv *priv;
- u32 reg_tmp;
+ u32 busmode, extctrl;
priv = mmc_priv(mmc);
@@ -687,28 +688,26 @@ static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
if (ios->clock != 0)
clk_set_rate(priv->clk_sdmmc, ios->clock);
+ busmode = readb(priv->sdmmc_base + SDMMC_BUSMODE);
+ extctrl = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
+
+ busmode &= ~(BM_EIGHTBIT_MODE | BM_FOURBIT_MODE);
+ extctrl &= ~EXT_EIGHTBIT;
+
switch (ios->bus_width) {
case MMC_BUS_WIDTH_8:
- reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
- writeb(reg_tmp | 0x04, priv->sdmmc_base + SDMMC_EXTCTRL);
+ busmode |= BM_EIGHTBIT_MODE;
+ extctrl |= EXT_EIGHTBIT;
break;
case MMC_BUS_WIDTH_4:
- reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
- writeb(reg_tmp | BM_FOURBIT_MODE, priv->sdmmc_base +
- SDMMC_BUSMODE);
-
- reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
- writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
+ busmode |= BM_FOURBIT_MODE;
break;
case MMC_BUS_WIDTH_1:
- reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
- writeb(reg_tmp & BM_ONEBIT_MASK, priv->sdmmc_base +
- SDMMC_BUSMODE);
-
- reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
- writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
break;
}
+
+ writeb(busmode, priv->sdmmc_base + SDMMC_BUSMODE);
+ writeb(extctrl, priv->sdmmc_base + SDMMC_EXTCTRL);
}
static int wmt_mci_get_ro(struct mmc_host *mmc)
@@ -830,7 +829,7 @@ static int wmt_mci_probe(struct platform_device *pdev)
goto fail3;
}
- ret = request_irq(dma_irq, wmt_mci_dma_isr, 32, "sdmmc", priv);
+ ret = request_irq(dma_irq, wmt_mci_dma_isr, 0, "sdmmc", priv);
if (ret) {
dev_err(&pdev->dev, "Register DMA IRQ fail\n");
goto fail4;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index af7c40ac145..e1a8f4e1998 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -581,7 +581,11 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
struct xgene_enet_desc_ring *ring;
struct xgene_enet_pdata *pdata = netdev_priv(ndev);
struct device *dev = ndev_to_dev(ndev);
- u32 size;
+ int size;
+
+ size = xgene_enet_get_ring_size(dev, cfgsize);
+ if (size < 0)
+ return NULL;
ring = devm_kzalloc(dev, sizeof(struct xgene_enet_desc_ring),
GFP_KERNEL);
@@ -593,7 +597,6 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
ring->cfgsize = cfgsize;
ring->id = ring_id;
- size = xgene_enet_get_ring_size(dev, cfgsize);
ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma,
GFP_KERNEL);
if (!ring->desc_addr) {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a3dd5dc64f4..4296b3d26f0 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14093,8 +14093,9 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
spin_lock_bh(&tp->lock);
if (!tp->hw_stats) {
+ *stats = tp->net_stats_prev;
spin_unlock_bh(&tp->lock);
- return &tp->net_stats_prev;
+ return stats;
}
tg3_get_nstats(tp, stats);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 811f1351db7..43e08d0bc3d 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -897,5 +897,6 @@ void be_roce_dev_remove(struct be_adapter *);
*/
void be_roce_dev_open(struct be_adapter *);
void be_roce_dev_close(struct be_adapter *);
+void be_roce_dev_shutdown(struct be_adapter *);
#endif /* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index db4ff14ff18..9cdeda54674 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5014,6 +5014,7 @@ static void be_shutdown(struct pci_dev *pdev)
if (!adapter)
return;
+ be_roce_dev_shutdown(adapter);
cancel_delayed_work_sync(&adapter->work);
cancel_delayed_work_sync(&adapter->func_recovery_work);
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 5bf16603a3e..ef4672dc735 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -120,7 +120,8 @@ static void _be_roce_dev_open(struct be_adapter *adapter)
{
if (ocrdma_drv && adapter->ocrdma_dev &&
ocrdma_drv->state_change_handler)
- ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0);
+ ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+ BE_DEV_UP);
}
void be_roce_dev_open(struct be_adapter *adapter)
@@ -136,7 +137,8 @@ static void _be_roce_dev_close(struct be_adapter *adapter)
{
if (ocrdma_drv && adapter->ocrdma_dev &&
ocrdma_drv->state_change_handler)
- ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1);
+ ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+ BE_DEV_DOWN);
}
void be_roce_dev_close(struct be_adapter *adapter)
@@ -148,6 +150,18 @@ void be_roce_dev_close(struct be_adapter *adapter)
}
}
+void be_roce_dev_shutdown(struct be_adapter *adapter)
+{
+ if (be_roce_supported(adapter)) {
+ mutex_lock(&be_adapter_list_lock);
+ if (ocrdma_drv && adapter->ocrdma_dev &&
+ ocrdma_drv->state_change_handler)
+ ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+ BE_DEV_SHUTDOWN);
+ mutex_unlock(&be_adapter_list_lock);
+ }
+}
+
int be_roce_register_driver(struct ocrdma_driver *drv)
{
struct be_adapter *dev;
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index a3d9e96c18e..e6f7eb1a7d8 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -62,7 +62,8 @@ struct ocrdma_driver {
enum {
BE_DEV_UP = 0,
- BE_DEV_DOWN = 1
+ BE_DEV_DOWN = 1,
+ BE_DEV_SHUTDOWN = 2
};
/* APIs for RoCE driver to register callback handlers,
diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile
index 775d9969b5c..cd473e29524 100644
--- a/drivers/net/ethernet/ibm/ehea/Makefile
+++ b/drivers/net/ethernet/ibm/ehea/Makefile
@@ -1,6 +1,6 @@
#
# Makefile for the eHEA ethernet device driver for IBM eServer System p
#
-ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o
+ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o
obj-$(CONFIG_EHEA) += ehea.o
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index 58856032298..06edfca1a35 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -47,7 +47,7 @@ static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
* e1000_mng_enable_host_if - Checks host interface is enabled
* @hw: pointer to the HW structure
*
- * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ * Returns 0 upon success, else -E1000_ERR_HOST_INTERFACE_COMMAND
*
* This function checks whether the HOST IF is enabled for command operation
* and also checks whether the previous command is completed. It busy waits
@@ -78,7 +78,7 @@ static s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
}
if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
- e_dbg("Previous command timeout failed .\n");
+ e_dbg("Previous command timeout failed.\n");
return -E1000_ERR_HOST_INTERFACE_COMMAND;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 6938fc1ad87..5d01db1d789 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -33,6 +33,7 @@
#include <scsi/fc/fc_fcoe.h>
#include <scsi/libfc.h>
#include <scsi/libfcoe.h>
+#include <uapi/linux/dcbnl.h>
#include "i40e.h"
#include "i40e_fcoe.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 51bc03072ed..871474f6fe6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4415,13 +4415,13 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
switch (vsi->back->hw.phy.link_info.link_speed) {
case I40E_LINK_SPEED_40GB:
- strncpy(speed, "40 Gbps", SPEED_SIZE);
+ strlcpy(speed, "40 Gbps", SPEED_SIZE);
break;
case I40E_LINK_SPEED_10GB:
- strncpy(speed, "10 Gbps", SPEED_SIZE);
+ strlcpy(speed, "10 Gbps", SPEED_SIZE);
break;
case I40E_LINK_SPEED_1GB:
- strncpy(speed, "1000 Mbps", SPEED_SIZE);
+ strlcpy(speed, "1000 Mbps", SPEED_SIZE);
break;
default:
break;
@@ -4429,16 +4429,16 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
switch (vsi->back->hw.fc.current_mode) {
case I40E_FC_FULL:
- strncpy(fc, "RX/TX", FC_SIZE);
+ strlcpy(fc, "RX/TX", FC_SIZE);
break;
case I40E_FC_TX_PAUSE:
- strncpy(fc, "TX", FC_SIZE);
+ strlcpy(fc, "TX", FC_SIZE);
break;
case I40E_FC_RX_PAUSE:
- strncpy(fc, "RX", FC_SIZE);
+ strlcpy(fc, "RX", FC_SIZE);
break;
default:
- strncpy(fc, "None", FC_SIZE);
+ strlcpy(fc, "None", FC_SIZE);
break;
}
@@ -5839,7 +5839,7 @@ static void i40e_send_version(struct i40e_pf *pf)
dv.minor_version = DRV_VERSION_MINOR;
dv.build_version = DRV_VERSION_BUILD;
dv.subbuild_version = 0;
- strncpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
+ strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
}
@@ -6293,7 +6293,7 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
if (alloc_qvectors) {
/* allocate memory for q_vector pointers */
- size = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors;
+ size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
vsi->q_vectors = kzalloc(size, GFP_KERNEL);
if (!vsi->q_vectors) {
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 97bda3dffd4..25c4f9a3011 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -251,9 +251,9 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
*
* Writes a 16 bit words buffer to the Shadow RAM using the admin command.
**/
-i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
- u32 offset, u16 words, void *data,
- bool last_command)
+static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 words, void *data,
+ bool last_command)
{
i40e_status ret_code = I40E_ERR_NVM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5d940a26055..65a4a0f88ea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1311,6 +1311,15 @@ static struct mlx4_cmd_info cmd_info[] = {
.wrapper = mlx4_MAD_IFC_wrapper
},
{
+ .opcode = MLX4_CMD_MAD_DEMUX,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
+ {
.opcode = MLX4_CMD_QUERY_IF_STAT,
.has_inbox = false,
.has_outbox = true,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 688e1eabab2..494753e44ae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[7] = "FSM (MAC anti-spoofing) support",
[8] = "Dynamic QP updates support",
[9] = "Device managed flow steering IPoIB support",
- [10] = "TCP/IP offloads/flow-steering for VXLAN support"
+ [10] = "TCP/IP offloads/flow-steering for VXLAN support",
+ [11] = "MAD DEMUX (Secure-Host) support"
};
int i;
@@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
#define QUERY_DEV_CAP_VXLAN 0x9e
+#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
dev_cap->flags2 = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(dev_cap->max_counters, outbox,
QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
+ MLX4_GET(field32, outbox,
+ QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
+ if (field32 & (1 << 0))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
+
MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
if (field32 & (1 << 16))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work)
out:
mlx4_free_cmd_mailbox(dev, mailbox);
}
+
+static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10
+#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20
+#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40
+#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70
+
+ u32 set_attr_mask, getresp_attr_mask;
+ u32 trap_attr_mask, traprepress_attr_mask;
+
+ MLX4_GET(set_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
+ set_attr_mask);
+
+ MLX4_GET(getresp_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
+ getresp_attr_mask);
+
+ MLX4_GET(trap_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
+ trap_attr_mask);
+
+ MLX4_GET(traprepress_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
+ traprepress_attr_mask);
+
+ if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
+ traprepress_attr_mask)
+ return 1;
+
+ return 0;
+}
+
+int mlx4_config_mad_demux(struct mlx4_dev *dev)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int secure_host_active;
+ int err;
+
+ /* Check if mad_demux is supported */
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
+ return -ENOMEM;
+ }
+
+ /* Query mad_demux to find out which MADs are handled by internal sma */
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
+ MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
+ err);
+ goto out;
+ }
+
+ secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
+
+ /* Config mad_demux to handle all MADs returned by the query above */
+ err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
+ MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
+ goto out;
+ }
+
+ if (secure_host_active)
+ mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 80b8c5f30e4..0158689906f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1853,6 +1853,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
goto err_mr_table_free;
}
+ err = mlx4_config_mad_demux(dev);
+ if (err) {
+ mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
+ goto err_mcg_table_free;
+ }
}
err = mlx4_init_eq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 13fbcd03c3e..b508c7887ef 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -274,6 +274,8 @@ struct mlx4_icm_table {
#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
#define MLX4_MPT_FLAG_REGION (1 << 8)
+#define MLX4_MPT_PD_MASK (0x1FFFFUL)
+#define MLX4_MPT_PD_VF_MASK (0xFE0000UL)
#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
@@ -1306,5 +1308,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev);
int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
/* Returns the VF index of slave */
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
+int mlx4_config_mad_demux(struct mlx4_dev *dev);
#endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2839abb878a..7d717eccb7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry ***mpt_entry)
+{
+ int err;
+ int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+
+ /* Make sure that at this point we have single-threaded access only */
+
+ if (mmr->enabled != MLX4_MPT_EN_HW)
+ return -EINVAL;
+
+ err = mlx4_HW2SW_MPT(dev, NULL, key);
+
+ if (err) {
+ mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+ mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+ return err;
+ }
+
+ mmr->enabled = MLX4_MPT_EN_SW;
+
+ if (!mlx4_is_mfunc(dev)) {
+ **mpt_entry = mlx4_table_find(
+ &mlx4_priv(dev)->mr_table.dmpt_table,
+ key, NULL);
+ } else {
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR_OR_NULL(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
+ 0, MLX4_CMD_QUERY_MPT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ if (err)
+ goto free_mailbox;
+
+ *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
+ }
+
+ if (!(*mpt_entry) || !(**mpt_entry)) {
+ err = -ENOMEM;
+ goto free_mailbox;
+ }
+
+ return 0;
+
+free_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
+
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry **mpt_entry)
+{
+ int err;
+
+ if (!mlx4_is_mfunc(dev)) {
+ /* Make sure any changes to this entry are flushed */
+ wmb();
+
+ *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
+
+ /* Make sure the new status is written */
+ wmb();
+
+ err = mlx4_SYNC_TPT(dev);
+ } else {
+ int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+
+ struct mlx4_cmd_mailbox *mailbox =
+ container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+ buf);
+
+ err = mlx4_SW2HW_MPT(dev, mailbox, key);
+ }
+
+ mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
+ if (!err)
+ mmr->enabled = MLX4_MPT_EN_HW;
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
+
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry **mpt_entry)
+{
+ if (mlx4_is_mfunc(dev)) {
+ struct mlx4_cmd_mailbox *mailbox =
+ container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+ buf);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
+
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+ u32 pdn)
+{
+ u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
+ /* The wrapper function will put the slave's id here */
+ if (mlx4_is_mfunc(dev))
+ pd_flags &= ~MLX4_MPT_PD_VF_MASK;
+ mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) |
+ (pdn & MLX4_MPT_PD_MASK)
+ | MLX4_MPT_PD_FLAG_EN_INV);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
+
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry *mpt_entry,
+ u32 access)
+{
+ u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
+ (access & MLX4_PERM_MASK);
+
+ mpt_entry->flags = cpu_to_be32(flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
+
static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
u64 iova, u64 size, u32 access, int npages,
int page_shift, struct mlx4_mr *mr)
@@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
}
EXPORT_SYMBOL_GPL(mlx4_mr_free);
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+ mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+ u64 iova, u64 size, int npages,
+ int page_shift, struct mlx4_mpt_entry *mpt_entry)
+{
+ int err;
+
+ mpt_entry->start = cpu_to_be64(mr->iova);
+ mpt_entry->length = cpu_to_be64(mr->size);
+ mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
+ err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+ if (err)
+ return err;
+
+ if (mr->mtt.order < 0) {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+ mpt_entry->mtt_addr = 0;
+ } else {
+ mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+ &mr->mtt));
+ if (mr->mtt.page_shift == 0)
+ mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
+ }
+ mr->enabled = MLX4_MPT_EN_SW;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
+
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0efc1368e5a..1089367fed2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
if (err)
return err;
- if (mpt->com.from_state != RES_MPT_HW) {
+ if (mpt->com.from_state == RES_MPT_MAPPED) {
+ /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
+ * that, the VF must read the MPT. But since the MPT entry memory is not
+ * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
+ * entry contents. To guarantee that the MPT cannot be changed, the driver
+ * must perform HW2SW_MPT before this query and return the MPT entry to HW
+ * ownership fofollowing the change. The change here allows the VF to
+ * perform QUERY_MPT also when the entry is in SW ownership.
+ */
+ struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
+ &mlx4_priv(dev)->mr_table.dmpt_table,
+ mpt->key, NULL);
+
+ if (NULL == mpt_entry || NULL == outbox->buf) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
+
+ err = 0;
+ } else if (mpt->com.from_state == RES_MPT_HW) {
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ } else {
err = -EBUSY;
goto out;
}
- err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
out:
put_res(dev, slave, id, RES_MPT);
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 69c26f04d8c..679db026f4b 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -873,6 +873,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
return -ENOMEM;
dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
DMA_BIDIRECTIONAL);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+ __free_page(dmatest_page);
+ return -ENOMEM;
+ }
/* Run a small DMA test.
* The magic multipliers to the length tell the firmware
@@ -1294,6 +1298,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
int bytes, int watchdog)
{
struct page *page;
+ dma_addr_t bus;
int idx;
#if MYRI10GE_ALLOC_SIZE > 4096
int end_offset;
@@ -1318,11 +1323,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
rx->watchdog_needed = 1;
return;
}
+
+ bus = pci_map_page(mgp->pdev, page, 0,
+ MYRI10GE_ALLOC_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ __free_pages(page, MYRI10GE_ALLOC_ORDER);
+ if (rx->fill_cnt - rx->cnt < 16)
+ rx->watchdog_needed = 1;
+ return;
+ }
+
rx->page = page;
rx->page_offset = 0;
- rx->bus = pci_map_page(mgp->pdev, page, 0,
- MYRI10GE_ALLOC_SIZE,
- PCI_DMA_FROMDEVICE);
+ rx->bus = bus;
+
}
rx->info[idx].page = rx->page;
rx->info[idx].page_offset = rx->page_offset;
@@ -2764,6 +2779,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
mb();
}
+static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
+ struct myri10ge_tx_buf *tx, int idx)
+{
+ unsigned int len;
+ int last_idx;
+
+ /* Free any DMA resources we've alloced and clear out the skb slot */
+ last_idx = (idx + 1) & tx->mask;
+ idx = tx->req & tx->mask;
+ do {
+ len = dma_unmap_len(&tx->info[idx], len);
+ if (len) {
+ if (tx->info[idx].skb != NULL)
+ pci_unmap_single(mgp->pdev,
+ dma_unmap_addr(&tx->info[idx],
+ bus), len,
+ PCI_DMA_TODEVICE);
+ else
+ pci_unmap_page(mgp->pdev,
+ dma_unmap_addr(&tx->info[idx],
+ bus), len,
+ PCI_DMA_TODEVICE);
+ dma_unmap_len_set(&tx->info[idx], len, 0);
+ tx->info[idx].skb = NULL;
+ }
+ idx = (idx + 1) & tx->mask;
+ } while (idx != last_idx);
+}
+
/*
* Transmit a packet. We need to split the packet so that a single
* segment does not cross myri10ge->tx_boundary, so this makes segment
@@ -2787,7 +2831,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
u32 low;
__be32 high_swapped;
unsigned int len;
- int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
+ int idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag;
@@ -2884,9 +2928,12 @@ again:
/* map the skb for DMA */
len = skb_headlen(skb);
+ bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+ goto drop;
+
idx = tx->req & tx->mask;
tx->info[idx].skb = skb;
- bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
dma_unmap_addr_set(&tx->info[idx], bus, bus);
dma_unmap_len_set(&tx->info[idx], len, len);
@@ -2985,12 +3032,16 @@ again:
break;
/* map next fragment for DMA */
- idx = (count + tx->req) & tx->mask;
frag = &skb_shinfo(skb)->frags[frag_idx];
frag_idx++;
len = skb_frag_size(frag);
bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
DMA_TO_DEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ myri10ge_unmap_tx_dma(mgp, tx, idx);
+ goto drop;
+ }
+ idx = (count + tx->req) & tx->mask;
dma_unmap_addr_set(&tx->info[idx], bus, bus);
dma_unmap_len_set(&tx->info[idx], len, len);
}
@@ -3021,31 +3072,8 @@ again:
return NETDEV_TX_OK;
abort_linearize:
- /* Free any DMA resources we've alloced and clear out the skb
- * slot so as to not trip up assertions, and to avoid a
- * double-free if linearizing fails */
+ myri10ge_unmap_tx_dma(mgp, tx, idx);
- last_idx = (idx + 1) & tx->mask;
- idx = tx->req & tx->mask;
- tx->info[idx].skb = NULL;
- do {
- len = dma_unmap_len(&tx->info[idx], len);
- if (len) {
- if (tx->info[idx].skb != NULL)
- pci_unmap_single(mgp->pdev,
- dma_unmap_addr(&tx->info[idx],
- bus), len,
- PCI_DMA_TODEVICE);
- else
- pci_unmap_page(mgp->pdev,
- dma_unmap_addr(&tx->info[idx],
- bus), len,
- PCI_DMA_TODEVICE);
- dma_unmap_len_set(&tx->info[idx], len, 0);
- tx->info[idx].skb = NULL;
- }
- idx = (idx + 1) & tx->mask;
- } while (idx != last_idx);
if (skb_is_gso(skb)) {
netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
goto drop;
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index d813bfb1a84..23c89ab5a6a 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -32,6 +32,11 @@ MODULE_DESCRIPTION("Sun LDOM virtual network driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_MODULE_VERSION);
+/* Heuristic for the number of times to exponentially backoff and
+ * retry sending an LDC trigger when EAGAIN is encountered
+ */
+#define VNET_MAX_RETRIES 10
+
/* Ordered from largest major to lowest */
static struct vio_version vnet_versions[] = {
{ .major = 1, .minor = 0 },
@@ -260,6 +265,7 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
.state = vio_dring_state,
};
int err, delay;
+ int retries = 0;
hdr.seq = dr->snd_nxt;
delay = 1;
@@ -272,6 +278,13 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
udelay(delay);
if ((delay <<= 1) > 128)
delay = 128;
+ if (retries++ > VNET_MAX_RETRIES) {
+ pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
+ port->raddr[0], port->raddr[1],
+ port->raddr[2], port->raddr[3],
+ port->raddr[4], port->raddr[5]);
+ err = -ECONNRESET;
+ }
} while (err == -EAGAIN);
return err;
@@ -475,8 +488,9 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
return 0;
}
-static void maybe_tx_wakeup(struct vnet *vp)
+static void maybe_tx_wakeup(unsigned long param)
{
+ struct vnet *vp = (struct vnet *)param;
struct net_device *dev = vp->dev;
netif_tx_lock(dev);
@@ -573,8 +587,13 @@ static void vnet_event(void *arg, int event)
break;
}
spin_unlock(&vio->lock);
+ /* Kick off a tasklet to wake the queue. We cannot call
+ * maybe_tx_wakeup directly here because we could deadlock on
+ * netif_tx_lock() with dev_watchdog()
+ */
if (unlikely(tx_wakeup && err != -ECONNRESET))
- maybe_tx_wakeup(port->vp);
+ tasklet_schedule(&port->vp->vnet_tx_wakeup);
+
local_irq_restore(flags);
}
@@ -593,6 +612,7 @@ static int __vnet_tx_trigger(struct vnet_port *port)
.end_idx = (u32) -1,
};
int err, delay;
+ int retries = 0;
hdr.seq = dr->snd_nxt;
delay = 1;
@@ -605,6 +625,8 @@ static int __vnet_tx_trigger(struct vnet_port *port)
udelay(delay);
if ((delay <<= 1) > 128)
delay = 128;
+ if (retries++ > VNET_MAX_RETRIES)
+ break;
} while (err == -EAGAIN);
return err;
@@ -691,7 +713,15 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
}
- d->hdr.ack = VIO_ACK_ENABLE;
+ /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
+ * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
+ * the protocol itself does not require it as long as the peer
+ * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
+ *
+ * An ACK for every packet in the ring is expensive as the
+ * sending of LDC messages is slow and affects performance.
+ */
+ d->hdr.ack = VIO_ACK_DISABLE;
d->size = len;
d->ncookies = port->tx_bufs[dr->prod].ncookies;
for (i = 0; i < d->ncookies; i++)
@@ -1046,6 +1076,7 @@ static struct vnet *vnet_new(const u64 *local_mac)
vp = netdev_priv(dev);
spin_lock_init(&vp->lock);
+ tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
vp->dev = dev;
INIT_LIST_HEAD(&vp->port_list);
@@ -1105,6 +1136,7 @@ static void vnet_cleanup(void)
vp = list_first_entry(&vnet_list, struct vnet, list);
list_del(&vp->list);
dev = vp->dev;
+ tasklet_kill(&vp->vnet_tx_wakeup);
/* vio_unregister_driver() should have cleaned up port_list */
BUG_ON(!list_empty(&vp->port_list));
unregister_netdev(dev);
diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
index d347a5bf24b..de5c2c64996 100644
--- a/drivers/net/ethernet/sun/sunvnet.h
+++ b/drivers/net/ethernet/sun/sunvnet.h
@@ -1,6 +1,8 @@
#ifndef _SUNVNET_H
#define _SUNVNET_H
+#include <linux/interrupt.h>
+
#define DESC_NCOOKIES(entry_size) \
((entry_size) - sizeof(struct vio_net_desc))
@@ -78,6 +80,8 @@ struct vnet {
struct list_head list;
u64 local_mac;
+
+ struct tasklet_struct vnet_tx_wakeup;
};
#endif /* _SUNVNET_H */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 36f4459520c..fda5891835d 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1170,7 +1170,6 @@ static struct platform_driver temac_of_driver = {
.probe = temac_of_probe,
.remove = temac_of_remove,
.driver = {
- .owner = THIS_MODULE,
.name = "xilinx_temac",
.of_match_table = temac_of_match,
},
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 30e8608ff05..c8fd94133ec 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1645,7 +1645,6 @@ static struct platform_driver axienet_of_driver = {
.probe = axienet_of_probe,
.remove = axienet_of_remove,
.driver = {
- .owner = THIS_MODULE,
.name = "xilinx_axienet",
.of_match_table = axienet_of_match,
},
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 782bb9373cd..28dbbdc393e 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1245,7 +1245,6 @@ MODULE_DEVICE_TABLE(of, xemaclite_of_match);
static struct platform_driver xemaclite_of_driver = {
.driver = {
.name = DRIVER_NAME,
- .owner = THIS_MODULE,
.of_match_table = xemaclite_of_match,
},
.probe = xemaclite_of_probe,
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a931..6d3e2093bf7 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
.resume = toshoboe_wakeup
};
-static int __init
-donauboe_init (void)
-{
- return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
- pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index ef8a5c20236..60e4ca01ccb 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -45,10 +45,9 @@ struct macvlan_port {
struct sk_buff_head bc_queue;
struct work_struct bc_work;
bool passthru;
+ int count;
};
-#define MACVLAN_PORT_IS_EMPTY(port) list_empty(&port->vlans)
-
struct macvlan_skb_cb {
const struct macvlan_dev *src;
};
@@ -667,7 +666,8 @@ static void macvlan_uninit(struct net_device *dev)
free_percpu(vlan->pcpu_stats);
- if (MACVLAN_PORT_IS_EMPTY(port))
+ port->count -= 1;
+ if (!port->count)
macvlan_port_destroy(port->dev);
}
@@ -1020,12 +1020,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
- if (!MACVLAN_PORT_IS_EMPTY(port))
+ if (port->count)
return -EINVAL;
port->passthru = true;
eth_hw_addr_inherit(dev, lowerdev);
}
+ port->count += 1;
err = register_netdevice(dev);
if (err < 0)
goto destroy_port;
@@ -1043,7 +1044,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
unregister_netdev:
unregister_netdevice(dev);
destroy_port:
- if (MACVLAN_PORT_IS_EMPTY(port))
+ port->count -= 1;
+ if (!port->count)
macvlan_port_destroy(lowerdev);
return err;
diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 8596aba34f9..237d0cda1bc 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -256,6 +256,7 @@ struct ar9170 {
atomic_t rx_work_urbs;
atomic_t rx_pool_urbs;
kernel_ulong_t features;
+ bool usb_ep_cmd_is_bulk;
/* firmware settings */
struct completion fw_load_wait;
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index f35c7f30f9a..c9f93310c0d 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd,
goto err_free;
}
- usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev,
- AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4,
- carl9170_usb_cmd_complete, ar, 1);
+ if (ar->usb_ep_cmd_is_bulk)
+ usb_fill_bulk_urb(urb, ar->udev,
+ usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD),
+ cmd, cmd->hdr.len + 4,
+ carl9170_usb_cmd_complete, ar);
+ else
+ usb_fill_int_urb(urb, ar->udev,
+ usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD),
+ cmd, cmd->hdr.len + 4,
+ carl9170_usb_cmd_complete, ar, 1);
if (free_buf)
urb->transfer_flags |= URB_FREE_BUFFER;
@@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw,
static int carl9170_usb_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
+ struct usb_endpoint_descriptor *ep;
struct ar9170 *ar;
struct usb_device *udev;
- int err;
+ int i, err;
err = usb_reset_device(interface_to_usbdev(intf));
if (err)
@@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf,
ar->intf = intf;
ar->features = id->driver_info;
+ /* We need to remember the type of endpoint 4 because it differs
+ * between high- and full-speed configuration. The high-speed
+ * configuration specifies it as interrupt and the full-speed
+ * configuration as bulk endpoint. This information is required
+ * later when sending urbs to that endpoint.
+ */
+ for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) {
+ ep = &intf->cur_altsetting->endpoint[i].desc;
+
+ if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD &&
+ usb_endpoint_dir_out(ep) &&
+ usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK)
+ ar->usb_ep_cmd_is_bulk = true;
+ }
+
usb_set_intfdata(intf, ar);
SET_IEEE80211_DEV(ar->hw, &intf->dev);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
index 535c7eb01b3..8f8b9373de9 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
@@ -1318,6 +1318,8 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
msgbuf->nrof_flowrings = if_msgbuf->nrof_flowrings;
msgbuf->flowring_dma_handle = kzalloc(msgbuf->nrof_flowrings *
sizeof(*msgbuf->flowring_dma_handle), GFP_ATOMIC);
+ if (!msgbuf->flowring_dma_handle)
+ goto fail;
msgbuf->rx_dataoffset = if_msgbuf->rx_dataoffset;
msgbuf->max_rxbufpost = if_msgbuf->max_rxbufpost;
@@ -1362,6 +1364,7 @@ fail:
kfree(msgbuf->flow_map);
kfree(msgbuf->txstatus_done_map);
brcmf_msgbuf_release_pktids(msgbuf);
+ kfree(msgbuf->flowring_dma_handle);
if (msgbuf->ioctbuf)
dma_free_coherent(drvr->bus_if->dev,
BRCMF_TX_IOCTL_MAX_MSG_SIZE,
@@ -1391,6 +1394,7 @@ void brcmf_proto_msgbuf_detach(struct brcmf_pub *drvr)
BRCMF_TX_IOCTL_MAX_MSG_SIZE,
msgbuf->ioctbuf, msgbuf->ioctbuf_handle);
brcmf_msgbuf_release_pktids(msgbuf);
+ kfree(msgbuf->flowring_dma_handle);
kfree(msgbuf);
drvr->proto->pd = NULL;
}
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
index bc972c0ba5f..e5101b287e4 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
@@ -591,12 +591,13 @@ static void brcmf_pcie_handle_mb_data(struct brcmf_pciedev_info *devinfo)
}
if (dtoh_mb_data & BRCMF_D2H_DEV_DS_EXIT_NOTE)
brcmf_dbg(PCIE, "D2H_MB_DATA: DEEP SLEEP EXIT\n");
- if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK)
+ if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) {
brcmf_dbg(PCIE, "D2H_MB_DATA: D3 ACK\n");
if (waitqueue_active(&devinfo->mbdata_resp_wait)) {
devinfo->mbdata_completed = true;
wake_up(&devinfo->mbdata_resp_wait);
}
+ }
}
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index c5aa404069f..389656bd1a7 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -9853,6 +9853,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev,
strncpy(extra, "unknown", MAX_WX_STRING);
break;
}
+ extra[MAX_WX_STRING - 1] = '\0';
IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra);
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 0d6a8b768a6..7c8796584c2 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -396,7 +396,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
else
hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
- hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
+ /* TODO: enable that only for firmwares that don't crash */
+ /* hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; */
hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX;
hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES;
/* we create the 802.11 header and zero length SSID IE. */
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f46a3..d4eb8d2e9cb 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
u16 dealloc_ring[MAX_PENDING_REQS];
struct task_struct *dealloc_task;
wait_queue_head_t dealloc_wq;
+ atomic_t inflight_packets;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues;
extern struct dentry *xen_netback_dbg_root;
#endif
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index bfd10cb9c8d..e29e15dca86 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,23 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb)
+{
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+ atomic_dec(&queue->inflight_packets);
+}
+
static inline void xenvif_stop_queue(struct xenvif_queue *queue)
{
struct net_device *dev = queue->vif->dev;
@@ -524,9 +541,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
init_timer(&queue->rx_stalled);
- netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
- XENVIF_NAPI_WEIGHT);
-
return 0;
}
@@ -560,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
init_waitqueue_head(&queue->wq);
init_waitqueue_head(&queue->dealloc_wq);
+ atomic_set(&queue->inflight_packets, 0);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
@@ -614,6 +629,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
wake_up_process(queue->task);
wake_up_process(queue->dealloc_task);
+ netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+ XENVIF_NAPI_WEIGHT);
+
return 0;
err_rx_unbind:
@@ -642,25 +660,6 @@ void xenvif_carrier_off(struct xenvif *vif)
rtnl_unlock();
}
-static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
- unsigned int worst_case_skb_lifetime)
-{
- int i, unmap_timeout = 0;
-
- for (i = 0; i < MAX_PENDING_REQS; ++i) {
- if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
- unmap_timeout++;
- schedule_timeout(msecs_to_jiffies(1000));
- if (unmap_timeout > worst_case_skb_lifetime &&
- net_ratelimit())
- netdev_err(queue->vif->dev,
- "Page still granted! Index: %x\n",
- i);
- i = -1;
- }
- }
-}
-
void xenvif_disconnect(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
@@ -672,6 +671,8 @@ void xenvif_disconnect(struct xenvif *vif)
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
+ netif_napi_del(&queue->napi);
+
if (queue->task) {
del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
@@ -704,7 +705,6 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_deinit_queue(struct xenvif_queue *queue)
{
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
- netif_napi_del(&queue->napi);
}
void xenvif_free(struct xenvif *vif)
@@ -712,21 +712,11 @@ void xenvif_free(struct xenvif *vif)
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- /* Here we want to avoid timeout messages if an skb can be legitimately
- * stuck somewhere else. Realistically this could be an another vif's
- * internal or QDisc queue. That another vif also has this
- * rx_drain_timeout_msecs timeout, so give it time to drain out.
- * Although if that other guest wakes up just before its timeout happens
- * and takes only one skb from QDisc, it can hold onto other skbs for a
- * longer period.
- */
- unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
unregister_netdev(vif->dev);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
- xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
xenvif_deinit_queue(queue);
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4734472aa62..08f65996534 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
/* remove traces of mapped pages and frag_list */
skb_frag_list_init(skb);
uarg = skb_shinfo(skb)->destructor_arg;
+ /* increase inflight counter to offset decrement in callback */
+ atomic_inc(&queue->inflight_packets);
uarg->callback(uarg, true);
skb_shinfo(skb)->destructor_arg = NULL;
- skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, nskb);
kfree_skb(nskb);
return 0;
@@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
if (net_ratelimit())
netdev_err(queue->vif->dev,
"Not enough memory to consolidate frag_list!\n");
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
"Can't setup checksum in net_tx_action\n");
/* We have to set this flag to trigger the callback */
if (skb_shinfo(skb)->destructor_arg)
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
* skb. E.g. the __pskb_pull_tail earlier can do such thing.
*/
if (skb_shinfo(skb)->destructor_arg) {
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
queue->stats.tx_zerocopy_sent++;
}
@@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
queue->stats.tx_zerocopy_success++;
else
queue->stats.tx_zerocopy_fail++;
+ xenvif_skb_zerocopy_complete(queue);
}
static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -2058,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data)
return 0;
}
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+ /* Dealloc thread must remain running until all inflight
+ * packets complete.
+ */
+ return kthread_should_stop() &&
+ !atomic_read(&queue->inflight_packets);
+}
+
int xenvif_dealloc_kthread(void *data)
{
struct xenvif_queue *queue = data;
- while (!kthread_should_stop()) {
+ for (;;) {
wait_event_interruptible(queue->dealloc_wq,
tx_dealloc_work_todo(queue) ||
- kthread_should_stop());
- if (kthread_should_stop())
+ xenvif_dealloc_kthread_should_stop(queue));
+ if (xenvif_dealloc_kthread_should_stop(queue))
break;
xenvif_tx_dealloc_action(queue);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 580517d857b..9c47b897b6d 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -116,6 +116,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
}
#define XENVIF_KICK_STR "kick"
+#define BUFFER_SIZE 32
static ssize_t
xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
@@ -124,22 +125,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
struct xenvif_queue *queue =
((struct seq_file *)filp->private_data)->private;
int len;
- char write[sizeof(XENVIF_KICK_STR)];
+ char write[BUFFER_SIZE];
/* don't allow partial writes and check the length */
if (*ppos != 0)
return 0;
- if (count < sizeof(XENVIF_KICK_STR) - 1)
+ if (count >= sizeof(write))
return -ENOSPC;
len = simple_write_to_buffer(write,
- sizeof(write),
+ sizeof(write) - 1,
ppos,
buf,
count);
if (len < 0)
return len;
+ write[len] = '\0';
+
if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
xenvif_interrupt(0, (void *)queue);
else {
@@ -171,10 +174,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
.write = xenvif_write_io_ring,
};
-static void xenvif_debugfs_addif(struct xenvif_queue *queue)
+static void xenvif_debugfs_addif(struct xenvif *vif)
{
struct dentry *pfile;
- struct xenvif *vif = queue->vif;
int i;
if (IS_ERR_OR_NULL(xen_netback_dbg_root))
@@ -733,10 +735,11 @@ static void connect(struct backend_info *be)
be->vif->num_queues = queue_index;
goto err;
}
+ }
+
#ifdef CONFIG_DEBUG_FS
- xenvif_debugfs_addif(queue);
+ xenvif_debugfs_addif(be->vif);
#endif /* CONFIG_DEBUG_FS */
- }
/* Initialisation completed, tell core driver the number of
* active queues.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 2dcb0541012..5160c4eb73c 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -9,7 +9,8 @@ menu "Device Tree and Open Firmware support"
config OF_SELFTEST
bool "Device Tree Runtime self tests"
- depends on OF_IRQ
+ depends on OF_IRQ && OF_EARLY_FLATTREE
+ select OF_DYNAMIC
help
This option builds in test cases for the device tree infrastructure
that are executed once at boot time, and the results dumped to the
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 099b1fb00af..2b6a7b129d1 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,11 +1,13 @@
obj-y = base.o device.o platform.o
+obj-$(CONFIG_OF_DYNAMIC) += dynamic.o
obj-$(CONFIG_OF_FLATTREE) += fdt.o
obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
obj-$(CONFIG_OF_PROMTREE) += pdt.o
obj-$(CONFIG_OF_ADDRESS) += address.o
obj-$(CONFIG_OF_IRQ) += irq.o
obj-$(CONFIG_OF_NET) += of_net.o
-obj-$(CONFIG_OF_SELFTEST) += selftest.o
+obj-$(CONFIG_OF_SELFTEST) += of_selftest.o
+of_selftest-objs := selftest.o testcase-data/testcases.dtb.o
obj-$(CONFIG_OF_MDIO) += of_mdio.o
obj-$(CONFIG_OF_PCI) += of_pci.o
obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o
diff --git a/drivers/of/base.c b/drivers/of/base.c
index b9864806e9b..d8574adf0d6 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,6 +17,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/console.h>
#include <linux/ctype.h>
#include <linux/cpu.h>
#include <linux/module.h>
@@ -35,15 +36,17 @@ struct device_node *of_allnodes;
EXPORT_SYMBOL(of_allnodes);
struct device_node *of_chosen;
struct device_node *of_aliases;
-static struct device_node *of_stdout;
+struct device_node *of_stdout;
-static struct kset *of_kset;
+struct kset *of_kset;
/*
- * Used to protect the of_aliases; but also overloaded to hold off addition of
- * nodes to sysfs
+ * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
+ * This mutex must be held whenever modifications are being made to the
+ * device tree. The of_{attach,detach}_node() and
+ * of_{add,remove,update}_property() helpers make sure this happens.
*/
-DEFINE_MUTEX(of_aliases_mutex);
+DEFINE_MUTEX(of_mutex);
/* use when traversing tree through the allnext, child, sibling,
* or parent members of struct device_node.
@@ -89,79 +92,7 @@ int __weak of_node_to_nid(struct device_node *np)
}
#endif
-#if defined(CONFIG_OF_DYNAMIC)
-/**
- * of_node_get - Increment refcount of a node
- * @node: Node to inc refcount, NULL is supported to
- * simplify writing of callers
- *
- * Returns node.
- */
-struct device_node *of_node_get(struct device_node *node)
-{
- if (node)
- kobject_get(&node->kobj);
- return node;
-}
-EXPORT_SYMBOL(of_node_get);
-
-static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
-{
- return container_of(kobj, struct device_node, kobj);
-}
-
-/**
- * of_node_release - release a dynamically allocated node
- * @kref: kref element of the node to be released
- *
- * In of_node_put() this function is passed to kref_put()
- * as the destructor.
- */
-static void of_node_release(struct kobject *kobj)
-{
- struct device_node *node = kobj_to_device_node(kobj);
- struct property *prop = node->properties;
-
- /* We should never be releasing nodes that haven't been detached. */
- if (!of_node_check_flag(node, OF_DETACHED)) {
- pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
- dump_stack();
- return;
- }
-
- if (!of_node_check_flag(node, OF_DYNAMIC))
- return;
-
- while (prop) {
- struct property *next = prop->next;
- kfree(prop->name);
- kfree(prop->value);
- kfree(prop);
- prop = next;
-
- if (!prop) {
- prop = node->deadprops;
- node->deadprops = NULL;
- }
- }
- kfree(node->full_name);
- kfree(node->data);
- kfree(node);
-}
-
-/**
- * of_node_put - Decrement refcount of a node
- * @node: Node to dec refcount, NULL is supported to
- * simplify writing of callers
- *
- */
-void of_node_put(struct device_node *node)
-{
- if (node)
- kobject_put(&node->kobj);
-}
-EXPORT_SYMBOL(of_node_put);
-#else
+#ifndef CONFIG_OF_DYNAMIC
static void of_node_release(struct kobject *kobj)
{
/* Without CONFIG_OF_DYNAMIC, no nodes gets freed */
@@ -200,13 +131,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
return name;
}
-static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
+int __of_add_property_sysfs(struct device_node *np, struct property *pp)
{
int rc;
/* Important: Don't leak passwords */
bool secure = strncmp(pp->name, "security-", 9) == 0;
+ if (!of_kset || !of_node_is_attached(np))
+ return 0;
+
sysfs_bin_attr_init(&pp->attr);
pp->attr.attr.name = safe_name(&np->kobj, pp->name);
pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO;
@@ -218,12 +152,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
return rc;
}
-static int __of_node_add(struct device_node *np)
+int __of_attach_node_sysfs(struct device_node *np)
{
const char *name;
struct property *pp;
int rc;
+ if (!of_kset)
+ return 0;
+
np->kobj.kset = of_kset;
if (!np->parent) {
/* Nodes without parents are new top level trees */
@@ -245,59 +182,20 @@ static int __of_node_add(struct device_node *np)
return 0;
}
-int of_node_add(struct device_node *np)
-{
- int rc = 0;
-
- BUG_ON(!of_node_is_initialized(np));
-
- /*
- * Grab the mutex here so that in a race condition between of_init() and
- * of_node_add(), node addition will still be consistent.
- */
- mutex_lock(&of_aliases_mutex);
- if (of_kset)
- rc = __of_node_add(np);
- else
- /* This scenario may be perfectly valid, but report it anyway */
- pr_info("of_node_add(%s) before of_init()\n", np->full_name);
- mutex_unlock(&of_aliases_mutex);
- return rc;
-}
-
-#if defined(CONFIG_OF_DYNAMIC)
-static void of_node_remove(struct device_node *np)
-{
- struct property *pp;
-
- BUG_ON(!of_node_is_initialized(np));
-
- /* only remove properties if on sysfs */
- if (of_node_is_attached(np)) {
- for_each_property_of_node(np, pp)
- sysfs_remove_bin_file(&np->kobj, &pp->attr);
- kobject_del(&np->kobj);
- }
-
- /* finally remove the kobj_init ref */
- of_node_put(np);
-}
-#endif
-
static int __init of_init(void)
{
struct device_node *np;
/* Create the kset, and register existing nodes */
- mutex_lock(&of_aliases_mutex);
+ mutex_lock(&of_mutex);
of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
if (!of_kset) {
- mutex_unlock(&of_aliases_mutex);
+ mutex_unlock(&of_mutex);
return -ENOMEM;
}
for_each_of_allnodes(np)
- __of_node_add(np);
- mutex_unlock(&of_aliases_mutex);
+ __of_attach_node_sysfs(np);
+ mutex_unlock(&of_mutex);
/* Symlink in /proc as required by userspace ABI */
if (of_allnodes)
@@ -369,8 +267,8 @@ EXPORT_SYMBOL(of_find_all_nodes);
* Find a property with a given name for a given node
* and return the value.
*/
-static const void *__of_get_property(const struct device_node *np,
- const char *name, int *lenp)
+const void *__of_get_property(const struct device_node *np,
+ const char *name, int *lenp)
{
struct property *pp = __of_find_property(np, name, lenp);
@@ -1748,32 +1646,10 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na
}
EXPORT_SYMBOL(of_count_phandle_with_args);
-#if defined(CONFIG_OF_DYNAMIC)
-static int of_property_notify(int action, struct device_node *np,
- struct property *prop)
-{
- struct of_prop_reconfig pr;
-
- /* only call notifiers if the node is attached */
- if (!of_node_is_attached(np))
- return 0;
-
- pr.dn = np;
- pr.prop = prop;
- return of_reconfig_notify(action, &pr);
-}
-#else
-static int of_property_notify(int action, struct device_node *np,
- struct property *prop)
-{
- return 0;
-}
-#endif
-
/**
* __of_add_property - Add a property to a node without lock operations
*/
-static int __of_add_property(struct device_node *np, struct property *prop)
+int __of_add_property(struct device_node *np, struct property *prop)
{
struct property **next;
@@ -1799,22 +1675,49 @@ int of_add_property(struct device_node *np, struct property *prop)
unsigned long flags;
int rc;
- rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
- if (rc)
- return rc;
+ mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
rc = __of_add_property(np, prop);
raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (rc)
- return rc;
- if (of_node_is_attached(np))
+ if (!rc)
__of_add_property_sysfs(np, prop);
+ mutex_unlock(&of_mutex);
+
+ if (!rc)
+ of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);
+
return rc;
}
+int __of_remove_property(struct device_node *np, struct property *prop)
+{
+ struct property **next;
+
+ for (next = &np->properties; *next; next = &(*next)->next) {
+ if (*next == prop)
+ break;
+ }
+ if (*next == NULL)
+ return -ENODEV;
+
+ /* found the node */
+ *next = prop->next;
+ prop->next = np->deadprops;
+ np->deadprops = prop;
+
+ return 0;
+}
+
+void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+{
+ /* at early boot, bail here and defer setup to of_init() */
+ if (of_kset && of_node_is_attached(np))
+ sysfs_remove_bin_file(&np->kobj, &prop->attr);
+}
+
/**
* of_remove_property - Remove a property from a node.
*
@@ -1825,211 +1728,98 @@ int of_add_property(struct device_node *np, struct property *prop)
*/
int of_remove_property(struct device_node *np, struct property *prop)
{
- struct property **next;
unsigned long flags;
- int found = 0;
int rc;
- rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
- if (rc)
- return rc;
+ mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
- next = &np->properties;
- while (*next) {
- if (*next == prop) {
- /* found the node */
- *next = prop->next;
- prop->next = np->deadprops;
- np->deadprops = prop;
- found = 1;
- break;
- }
- next = &(*next)->next;
- }
+ rc = __of_remove_property(np, prop);
raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (!found)
- return -ENODEV;
+ if (!rc)
+ __of_remove_property_sysfs(np, prop);
- /* at early boot, bail hear and defer setup to of_init() */
- if (!of_kset)
- return 0;
+ mutex_unlock(&of_mutex);
- sysfs_remove_bin_file(&np->kobj, &prop->attr);
+ if (!rc)
+ of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);
- return 0;
+ return rc;
}
-/*
- * of_update_property - Update a property in a node, if the property does
- * not exist, add it.
- *
- * Note that we don't actually remove it, since we have given out
- * who-knows-how-many pointers to the data using get-property.
- * Instead we just move the property to the "dead properties" list,
- * and add the new property to the property list
- */
-int of_update_property(struct device_node *np, struct property *newprop)
+int __of_update_property(struct device_node *np, struct property *newprop,
+ struct property **oldpropp)
{
struct property **next, *oldprop;
- unsigned long flags;
- int rc;
-
- rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
- if (rc)
- return rc;
- if (!newprop->name)
- return -EINVAL;
+ for (next = &np->properties; *next; next = &(*next)->next) {
+ if (of_prop_cmp((*next)->name, newprop->name) == 0)
+ break;
+ }
+ *oldpropp = oldprop = *next;
- raw_spin_lock_irqsave(&devtree_lock, flags);
- next = &np->properties;
- oldprop = __of_find_property(np, newprop->name, NULL);
- if (!oldprop) {
- /* add the new node */
- rc = __of_add_property(np, newprop);
- } else while (*next) {
+ if (oldprop) {
/* replace the node */
- if (*next == oldprop) {
- newprop->next = oldprop->next;
- *next = newprop;
- oldprop->next = np->deadprops;
- np->deadprops = oldprop;
- break;
- }
- next = &(*next)->next;
+ newprop->next = oldprop->next;
+ *next = newprop;
+ oldprop->next = np->deadprops;
+ np->deadprops = oldprop;
+ } else {
+ /* new node */
+ newprop->next = NULL;
+ *next = newprop;
}
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (rc)
- return rc;
+ return 0;
+}
+
+void __of_update_property_sysfs(struct device_node *np, struct property *newprop,
+ struct property *oldprop)
+{
/* At early boot, bail out and defer setup to of_init() */
if (!of_kset)
- return 0;
+ return;
- /* Update the sysfs attribute */
if (oldprop)
sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
__of_add_property_sysfs(np, newprop);
-
- return 0;
}
-#if defined(CONFIG_OF_DYNAMIC)
/*
- * Support for dynamic device trees.
+ * of_update_property - Update a property in a node, if the property does
+ * not exist, add it.
*
- * On some platforms, the device tree can be manipulated at runtime.
- * The routines in this section support adding, removing and changing
- * device tree nodes.
- */
-
-static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
-
-int of_reconfig_notifier_register(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&of_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
-
-int of_reconfig_notifier_unregister(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
-
-int of_reconfig_notify(unsigned long action, void *p)
-{
- int rc;
-
- rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
- return notifier_to_errno(rc);
-}
-
-/**
- * of_attach_node - Plug a device node into the tree and global list.
+ * Note that we don't actually remove it, since we have given out
+ * who-knows-how-many pointers to the data using get-property.
+ * Instead we just move the property to the "dead properties" list,
+ * and add the new property to the property list
*/
-int of_attach_node(struct device_node *np)
+int of_update_property(struct device_node *np, struct property *newprop)
{
+ struct property *oldprop;
unsigned long flags;
int rc;
- rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
- if (rc)
- return rc;
-
- raw_spin_lock_irqsave(&devtree_lock, flags);
- np->sibling = np->parent->child;
- np->allnext = np->parent->allnext;
- np->parent->allnext = np;
- np->parent->child = np;
- of_node_clear_flag(np, OF_DETACHED);
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
-
- of_node_add(np);
- return 0;
-}
-
-/**
- * of_detach_node - "Unplug" a node from the device tree.
- *
- * The caller must hold a reference to the node. The memory associated with
- * the node is not freed until its refcount goes to zero.
- */
-int of_detach_node(struct device_node *np)
-{
- struct device_node *parent;
- unsigned long flags;
- int rc = 0;
+ if (!newprop->name)
+ return -EINVAL;
- rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
- if (rc)
- return rc;
+ mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
+ rc = __of_update_property(np, newprop, &oldprop);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (of_node_check_flag(np, OF_DETACHED)) {
- /* someone already detached it */
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
- return rc;
- }
-
- parent = np->parent;
- if (!parent) {
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
- return rc;
- }
+ if (!rc)
+ __of_update_property_sysfs(np, newprop, oldprop);
- if (of_allnodes == np)
- of_allnodes = np->allnext;
- else {
- struct device_node *prev;
- for (prev = of_allnodes;
- prev->allnext != np;
- prev = prev->allnext)
- ;
- prev->allnext = np->allnext;
- }
+ mutex_unlock(&of_mutex);
- if (parent->child == np)
- parent->child = np->sibling;
- else {
- struct device_node *prevsib;
- for (prevsib = np->parent->child;
- prevsib->sibling != np;
- prevsib = prevsib->sibling)
- ;
- prevsib->sibling = np->sibling;
- }
-
- of_node_set_flag(np, OF_DETACHED);
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ if (!rc)
+ of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);
- of_node_remove(np);
return rc;
}
-#endif /* defined(CONFIG_OF_DYNAMIC) */
static void of_alias_add(struct alias_prop *ap, struct device_node *np,
int id, const char *stem, int stem_len)
@@ -2062,9 +1852,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
of_chosen = of_find_node_by_path("/chosen@0");
if (of_chosen) {
+ /* linux,stdout-path and /aliases/stdout are for legacy compatibility */
const char *name = of_get_property(of_chosen, "stdout-path", NULL);
if (!name)
name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (IS_ENABLED(CONFIG_PPC) && !name)
+ name = of_get_property(of_aliases, "stdout", NULL);
if (name)
of_stdout = of_find_node_by_path(name);
}
@@ -2122,7 +1915,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
struct alias_prop *app;
int id = -ENODEV;
- mutex_lock(&of_aliases_mutex);
+ mutex_lock(&of_mutex);
list_for_each_entry(app, &aliases_lookup, link) {
if (strcmp(app->stem, stem) != 0)
continue;
@@ -2132,7 +1925,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
break;
}
}
- mutex_unlock(&of_aliases_mutex);
+ mutex_unlock(&of_mutex);
return id;
}
@@ -2180,20 +1973,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
EXPORT_SYMBOL_GPL(of_prop_next_string);
/**
- * of_device_is_stdout_path - check if a device node matches the
- * linux,stdout-path property
- *
- * Check if this device node matches the linux,stdout-path property
- * in the chosen node. return true if yes, false otherwise.
+ * of_console_check() - Test and setup console for DT setup
+ * @dn - Pointer to device node
+ * @name - Name to use for preferred console without index. ex. "ttyS"
+ * @index - Index to use for preferred console.
+ *
+ * Check if the given device node matches the stdout-path property in the
+ * /chosen node. If it does then register it as the preferred console and return
+ * TRUE. Otherwise return FALSE.
*/
-int of_device_is_stdout_path(struct device_node *dn)
+bool of_console_check(struct device_node *dn, char *name, int index)
{
- if (!of_stdout)
+ if (!dn || dn != of_stdout || console_set_on_cmdline)
return false;
-
- return of_stdout == dn;
+ return add_preferred_console(name, index, NULL);
}
-EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+EXPORT_SYMBOL_GPL(of_console_check);
/**
* of_find_next_cache_node - Find a node's subsidiary cache
diff --git a/drivers/of/device.c b/drivers/of/device.c
index dafb9736ab9..46d6c75c140 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -160,7 +160,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen);
seen = 0;
- mutex_lock(&of_aliases_mutex);
+ mutex_lock(&of_mutex);
list_for_each_entry(app, &aliases_lookup, link) {
if (dev->of_node == app->np) {
add_uevent_var(env, "OF_ALIAS_%d=%s", seen,
@@ -168,7 +168,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
seen++;
}
}
- mutex_unlock(&of_aliases_mutex);
+ mutex_unlock(&of_mutex);
}
int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
new file mode 100644
index 00000000000..54fecc49a1f
--- /dev/null
+++ b/drivers/of/dynamic.c
@@ -0,0 +1,660 @@
+/*
+ * Support for dynamic device trees.
+ *
+ * On some platforms, the device tree can be manipulated at runtime.
+ * The routines in this section support adding, removing and changing
+ * device tree nodes.
+ */
+
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+
+#include "of_private.h"
+
+/**
+ * of_node_get() - Increment refcount of a node
+ * @node: Node to inc refcount, NULL is supported to simplify writing of
+ * callers
+ *
+ * Returns node.
+ */
+struct device_node *of_node_get(struct device_node *node)
+{
+ if (node)
+ kobject_get(&node->kobj);
+ return node;
+}
+EXPORT_SYMBOL(of_node_get);
+
+/**
+ * of_node_put() - Decrement refcount of a node
+ * @node: Node to dec refcount, NULL is supported to simplify writing of
+ * callers
+ */
+void of_node_put(struct device_node *node)
+{
+ if (node)
+ kobject_put(&node->kobj);
+}
+EXPORT_SYMBOL(of_node_put);
+
+void __of_detach_node_sysfs(struct device_node *np)
+{
+ struct property *pp;
+
+ BUG_ON(!of_node_is_initialized(np));
+ if (!of_kset)
+ return;
+
+ /* only remove properties if on sysfs */
+ if (of_node_is_attached(np)) {
+ for_each_property_of_node(np, pp)
+ sysfs_remove_bin_file(&np->kobj, &pp->attr);
+ kobject_del(&np->kobj);
+ }
+
+ /* finally remove the kobj_init ref */
+ of_node_put(np);
+}
+
+static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
+
+int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
+
+int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
+
+int of_reconfig_notify(unsigned long action, void *p)
+{
+ int rc;
+
+ rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
+ return notifier_to_errno(rc);
+}
+
+int of_property_notify(int action, struct device_node *np,
+ struct property *prop, struct property *oldprop)
+{
+ struct of_prop_reconfig pr;
+
+ /* only call notifiers if the node is attached */
+ if (!of_node_is_attached(np))
+ return 0;
+
+ pr.dn = np;
+ pr.prop = prop;
+ pr.old_prop = oldprop;
+ return of_reconfig_notify(action, &pr);
+}
+
+void __of_attach_node(struct device_node *np)
+{
+ const __be32 *phandle;
+ int sz;
+
+ np->name = __of_get_property(np, "name", NULL) ? : "<NULL>";
+ np->type = __of_get_property(np, "device_type", NULL) ? : "<NULL>";
+
+ phandle = __of_get_property(np, "phandle", &sz);
+ if (!phandle)
+ phandle = __of_get_property(np, "linux,phandle", &sz);
+ if (IS_ENABLED(PPC_PSERIES) && !phandle)
+ phandle = __of_get_property(np, "ibm,phandle", &sz);
+ np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0;
+
+ np->child = NULL;
+ np->sibling = np->parent->child;
+ np->allnext = np->parent->allnext;
+ np->parent->allnext = np;
+ np->parent->child = np;
+ of_node_clear_flag(np, OF_DETACHED);
+}
+
+/**
+ * of_attach_node() - Plug a device node into the tree and global list.
+ */
+int of_attach_node(struct device_node *np)
+{
+ unsigned long flags;
+
+ mutex_lock(&of_mutex);
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ __of_attach_node(np);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ __of_attach_node_sysfs(np);
+ mutex_unlock(&of_mutex);
+
+ of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+
+ return 0;
+}
+
+void __of_detach_node(struct device_node *np)
+{
+ struct device_node *parent;
+
+ if (WARN_ON(of_node_check_flag(np, OF_DETACHED)))
+ return;
+
+ parent = np->parent;
+ if (WARN_ON(!parent))
+ return;
+
+ if (of_allnodes == np)
+ of_allnodes = np->allnext;
+ else {
+ struct device_node *prev;
+ for (prev = of_allnodes;
+ prev->allnext != np;
+ prev = prev->allnext)
+ ;
+ prev->allnext = np->allnext;
+ }
+
+ if (parent->child == np)
+ parent->child = np->sibling;
+ else {
+ struct device_node *prevsib;
+ for (prevsib = np->parent->child;
+ prevsib->sibling != np;
+ prevsib = prevsib->sibling)
+ ;
+ prevsib->sibling = np->sibling;
+ }
+
+ of_node_set_flag(np, OF_DETACHED);
+}
+
+/**
+ * of_detach_node() - "Unplug" a node from the device tree.
+ *
+ * The caller must hold a reference to the node. The memory associated with
+ * the node is not freed until its refcount goes to zero.
+ */
+int of_detach_node(struct device_node *np)
+{
+ unsigned long flags;
+ int rc = 0;
+
+ mutex_lock(&of_mutex);
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ __of_detach_node(np);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ __of_detach_node_sysfs(np);
+ mutex_unlock(&of_mutex);
+
+ of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+
+ return rc;
+}
+
+/**
+ * of_node_release() - release a dynamically allocated node
+ * @kref: kref element of the node to be released
+ *
+ * In of_node_put() this function is passed to kref_put() as the destructor.
+ */
+void of_node_release(struct kobject *kobj)
+{
+ struct device_node *node = kobj_to_device_node(kobj);
+ struct property *prop = node->properties;
+
+ /* We should never be releasing nodes that haven't been detached. */
+ if (!of_node_check_flag(node, OF_DETACHED)) {
+ pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
+ dump_stack();
+ return;
+ }
+
+ if (!of_node_check_flag(node, OF_DYNAMIC))
+ return;
+
+ while (prop) {
+ struct property *next = prop->next;
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+ prop = next;
+
+ if (!prop) {
+ prop = node->deadprops;
+ node->deadprops = NULL;
+ }
+ }
+ kfree(node->full_name);
+ kfree(node->data);
+ kfree(node);
+}
+
+/**
+ * __of_prop_dup - Copy a property dynamically.
+ * @prop: Property to copy
+ * @allocflags: Allocation flags (typically pass GFP_KERNEL)
+ *
+ * Copy a property by dynamically allocating the memory of both the
+ * property stucture and the property name & contents. The property's
+ * flags have the OF_DYNAMIC bit set so that we can differentiate between
+ * dynamically allocated properties and not.
+ * Returns the newly allocated property or NULL on out of memory error.
+ */
+struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
+{
+ struct property *new;
+
+ new = kzalloc(sizeof(*new), allocflags);
+ if (!new)
+ return NULL;
+
+ /*
+ * NOTE: There is no check for zero length value.
+ * In case of a boolean property, this will allocate a value
+ * of zero bytes. We do this to work around the use
+ * of of_get_property() calls on boolean values.
+ */
+ new->name = kstrdup(prop->name, allocflags);
+ new->value = kmemdup(prop->value, prop->length, allocflags);
+ new->length = prop->length;
+ if (!new->name || !new->value)
+ goto err_free;
+
+ /* mark the property as dynamic */
+ of_property_set_flag(new, OF_DYNAMIC);
+
+ return new;
+
+ err_free:
+ kfree(new->name);
+ kfree(new->value);
+ kfree(new);
+ return NULL;
+}
+
+/**
+ * __of_node_alloc() - Create an empty device node dynamically.
+ * @full_name: Full name of the new device node
+ * @allocflags: Allocation flags (typically pass GFP_KERNEL)
+ *
+ * Create an empty device tree node, suitable for further modification.
+ * The node data are dynamically allocated and all the node flags
+ * have the OF_DYNAMIC & OF_DETACHED bits set.
+ * Returns the newly allocated node or NULL on out of memory error.
+ */
+struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
+{
+ struct device_node *node;
+
+ node = kzalloc(sizeof(*node), allocflags);
+ if (!node)
+ return NULL;
+
+ node->full_name = kstrdup(full_name, allocflags);
+ of_node_set_flag(node, OF_DYNAMIC);
+ of_node_set_flag(node, OF_DETACHED);
+ if (!node->full_name)
+ goto err_free;
+
+ of_node_init(node);
+
+ return node;
+
+ err_free:
+ kfree(node->full_name);
+ kfree(node);
+ return NULL;
+}
+
+static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
+{
+ of_node_put(ce->np);
+ list_del(&ce->node);
+ kfree(ce);
+}
+
+#ifdef DEBUG
+static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+ switch (ce->action) {
+ case OF_RECONFIG_ADD_PROPERTY:
+ pr_debug("%p: %s %s/%s\n",
+ ce, "ADD_PROPERTY ", ce->np->full_name,
+ ce->prop->name);
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ pr_debug("%p: %s %s/%s\n",
+ ce, "REMOVE_PROPERTY", ce->np->full_name,
+ ce->prop->name);
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ pr_debug("%p: %s %s/%s\n",
+ ce, "UPDATE_PROPERTY", ce->np->full_name,
+ ce->prop->name);
+ break;
+ case OF_RECONFIG_ATTACH_NODE:
+ pr_debug("%p: %s %s\n",
+ ce, "ATTACH_NODE ", ce->np->full_name);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ pr_debug("%p: %s %s\n",
+ ce, "DETACH_NODE ", ce->np->full_name);
+ break;
+ }
+}
+#else
+static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+ /* empty */
+}
+#endif
+
+static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
+ struct of_changeset_entry *rce)
+{
+ memcpy(rce, ce, sizeof(*rce));
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ rce->action = OF_RECONFIG_DETACH_NODE;
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ rce->action = OF_RECONFIG_ATTACH_NODE;
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ rce->action = OF_RECONFIG_REMOVE_PROPERTY;
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ rce->action = OF_RECONFIG_ADD_PROPERTY;
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ rce->old_prop = ce->prop;
+ rce->prop = ce->old_prop;
+ break;
+ }
+}
+
+static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
+{
+ struct of_changeset_entry ce_inverted;
+ int ret;
+
+ if (revert) {
+ __of_changeset_entry_invert(ce, &ce_inverted);
+ ce = &ce_inverted;
+ }
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ case OF_RECONFIG_DETACH_NODE:
+ ret = of_reconfig_notify(ce->action, ce->np);
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop);
+ break;
+ default:
+ pr_err("%s: invalid devicetree changeset action: %i\n", __func__,
+ (int)ce->action);
+ return;
+ }
+
+ if (ret)
+ pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name);
+}
+
+static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+{
+ struct property *old_prop, **propp;
+ unsigned long flags;
+ int ret = 0;
+
+ __of_changeset_entry_dump(ce);
+
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ __of_attach_node(ce->np);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ __of_detach_node(ce->np);
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ /* If the property is in deadprops then it must be removed */
+ for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+ if (*propp == ce->prop) {
+ *propp = ce->prop->next;
+ ce->prop->next = NULL;
+ break;
+ }
+ }
+
+ ret = __of_add_property(ce->np, ce->prop);
+ if (ret) {
+ pr_err("%s: add_property failed @%s/%s\n",
+ __func__, ce->np->full_name,
+ ce->prop->name);
+ break;
+ }
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ ret = __of_remove_property(ce->np, ce->prop);
+ if (ret) {
+ pr_err("%s: remove_property failed @%s/%s\n",
+ __func__, ce->np->full_name,
+ ce->prop->name);
+ break;
+ }
+ break;
+
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ /* If the property is in deadprops then it must be removed */
+ for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+ if (*propp == ce->prop) {
+ *propp = ce->prop->next;
+ ce->prop->next = NULL;
+ break;
+ }
+ }
+
+ ret = __of_update_property(ce->np, ce->prop, &old_prop);
+ if (ret) {
+ pr_err("%s: update_property failed @%s/%s\n",
+ __func__, ce->np->full_name,
+ ce->prop->name);
+ break;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ if (ret)
+ return ret;
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ __of_attach_node_sysfs(ce->np);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ __of_detach_node_sysfs(ce->np);
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ /* ignore duplicate names */
+ __of_add_property_sysfs(ce->np, ce->prop);
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ __of_remove_property_sysfs(ce->np, ce->prop);
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ __of_update_property_sysfs(ce->np, ce->prop, ce->old_prop);
+ break;
+ }
+
+ return 0;
+}
+
+static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce)
+{
+ struct of_changeset_entry ce_inverted;
+
+ __of_changeset_entry_invert(ce, &ce_inverted);
+ return __of_changeset_entry_apply(&ce_inverted);
+}
+
+/**
+ * of_changeset_init - Initialize a changeset for use
+ *
+ * @ocs: changeset pointer
+ *
+ * Initialize a changeset structure
+ */
+void of_changeset_init(struct of_changeset *ocs)
+{
+ memset(ocs, 0, sizeof(*ocs));
+ INIT_LIST_HEAD(&ocs->entries);
+}
+
+/**
+ * of_changeset_destroy - Destroy a changeset
+ *
+ * @ocs: changeset pointer
+ *
+ * Destroys a changeset. Note that if a changeset is applied,
+ * its changes to the tree cannot be reverted.
+ */
+void of_changeset_destroy(struct of_changeset *ocs)
+{
+ struct of_changeset_entry *ce, *cen;
+
+ list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+ __of_changeset_entry_destroy(ce);
+}
+
+/**
+ * of_changeset_apply - Applies a changeset
+ *
+ * @ocs: changeset pointer
+ *
+ * Applies a changeset to the live tree.
+ * Any side-effects of live tree state changes are applied here on
+ * sucess, like creation/destruction of devices and side-effects
+ * like creation of sysfs properties and directories.
+ * Returns 0 on success, a negative error value in case of an error.
+ * On error the partially applied effects are reverted.
+ */
+int of_changeset_apply(struct of_changeset *ocs)
+{
+ struct of_changeset_entry *ce;
+ int ret;
+
+ /* perform the rest of the work */
+ pr_debug("of_changeset: applying...\n");
+ list_for_each_entry(ce, &ocs->entries, node) {
+ ret = __of_changeset_entry_apply(ce);
+ if (ret) {
+ pr_err("%s: Error applying changeset (%d)\n", __func__, ret);
+ list_for_each_entry_continue_reverse(ce, &ocs->entries, node)
+ __of_changeset_entry_revert(ce);
+ return ret;
+ }
+ }
+ pr_debug("of_changeset: applied, emitting notifiers.\n");
+
+ /* drop the global lock while emitting notifiers */
+ mutex_unlock(&of_mutex);
+ list_for_each_entry(ce, &ocs->entries, node)
+ __of_changeset_entry_notify(ce, 0);
+ mutex_lock(&of_mutex);
+ pr_debug("of_changeset: notifiers sent.\n");
+
+ return 0;
+}
+
+/**
+ * of_changeset_revert - Reverts an applied changeset
+ *
+ * @ocs: changeset pointer
+ *
+ * Reverts a changeset returning the state of the tree to what it
+ * was before the application.
+ * Any side-effects like creation/destruction of devices and
+ * removal of sysfs properties and directories are applied.
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_revert(struct of_changeset *ocs)
+{
+ struct of_changeset_entry *ce;
+ int ret;
+
+ pr_debug("of_changeset: reverting...\n");
+ list_for_each_entry_reverse(ce, &ocs->entries, node) {
+ ret = __of_changeset_entry_revert(ce);
+ if (ret) {
+ pr_err("%s: Error reverting changeset (%d)\n", __func__, ret);
+ list_for_each_entry_continue(ce, &ocs->entries, node)
+ __of_changeset_entry_apply(ce);
+ return ret;
+ }
+ }
+ pr_debug("of_changeset: reverted, emitting notifiers.\n");
+
+ /* drop the global lock while emitting notifiers */
+ mutex_unlock(&of_mutex);
+ list_for_each_entry_reverse(ce, &ocs->entries, node)
+ __of_changeset_entry_notify(ce, 1);
+ mutex_lock(&of_mutex);
+ pr_debug("of_changeset: notifiers sent.\n");
+
+ return 0;
+}
+
+/**
+ * of_changeset_action - Perform a changeset action
+ *
+ * @ocs: changeset pointer
+ * @action: action to perform
+ * @np: Pointer to device node
+ * @prop: Pointer to property
+ *
+ * On action being one of:
+ * + OF_RECONFIG_ATTACH_NODE
+ * + OF_RECONFIG_DETACH_NODE,
+ * + OF_RECONFIG_ADD_PROPERTY
+ * + OF_RECONFIG_REMOVE_PROPERTY,
+ * + OF_RECONFIG_UPDATE_PROPERTY
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+ struct device_node *np, struct property *prop)
+{
+ struct of_changeset_entry *ce;
+
+ ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+ if (!ce) {
+ pr_err("%s: Failed to allocate\n", __func__);
+ return -ENOMEM;
+ }
+ /* get a reference to the node */
+ ce->action = action;
+ ce->np = of_node_get(np);
+ ce->prop = prop;
+
+ if (action == OF_RECONFIG_UPDATE_PROPERTY && prop)
+ ce->old_prop = of_find_property(np, prop->name, NULL);
+
+ /* add it to the list */
+ list_add_tail(&ce->node, &ocs->entries);
+ return 0;
+}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 9aa012e6ea0..f46a24ffa3f 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -923,24 +923,24 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
}
#ifdef CONFIG_HAVE_MEMBLOCK
+#define MAX_PHYS_ADDR ((phys_addr_t)~0)
+
void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
{
const u64 phys_offset = __pa(PAGE_OFFSET);
base &= PAGE_MASK;
size &= PAGE_MASK;
- if (sizeof(phys_addr_t) < sizeof(u64)) {
- if (base > ULONG_MAX) {
- pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
- base, base + size);
- return;
- }
+ if (base > MAX_PHYS_ADDR) {
+ pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
+ base, base + size);
+ return;
+ }
- if (base + size > ULONG_MAX) {
- pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
- ULONG_MAX, base + size);
- size = ULONG_MAX - base;
- }
+ if (base + size > MAX_PHYS_ADDR) {
+ pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
+ ULONG_MAX, base + size);
+ size = MAX_PHYS_ADDR - base;
}
if (base + size < phys_offset) {
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index ff350c8fa7a..858e0a5d9a1 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -31,6 +31,63 @@ struct alias_prop {
char stem[0];
};
-extern struct mutex of_aliases_mutex;
+extern struct mutex of_mutex;
extern struct list_head aliases_lookup;
+extern struct kset *of_kset;
+
+
+static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
+{
+ return container_of(kobj, struct device_node, kobj);
+}
+
+#if defined(CONFIG_OF_DYNAMIC)
+extern int of_property_notify(int action, struct device_node *np,
+ struct property *prop, struct property *old_prop);
+extern void of_node_release(struct kobject *kobj);
+#else /* CONFIG_OF_DYNAMIC */
+static inline int of_property_notify(int action, struct device_node *np,
+ struct property *prop, struct property *old_prop)
+{
+ return 0;
+}
+#endif /* CONFIG_OF_DYNAMIC */
+
+/**
+ * General utilities for working with live trees.
+ *
+ * All functions with two leading underscores operate
+ * without taking node references, so you either have to
+ * own the devtree lock or work on detached trees only.
+ */
+struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
+struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
+
+extern const void *__of_get_property(const struct device_node *np,
+ const char *name, int *lenp);
+extern int __of_add_property(struct device_node *np, struct property *prop);
+extern int __of_add_property_sysfs(struct device_node *np,
+ struct property *prop);
+extern int __of_remove_property(struct device_node *np, struct property *prop);
+extern void __of_remove_property_sysfs(struct device_node *np,
+ struct property *prop);
+extern int __of_update_property(struct device_node *np,
+ struct property *newprop, struct property **oldprop);
+extern void __of_update_property_sysfs(struct device_node *np,
+ struct property *newprop, struct property *oldprop);
+
+extern void __of_attach_node(struct device_node *np);
+extern int __of_attach_node_sysfs(struct device_node *np);
+extern void __of_detach_node(struct device_node *np);
+extern void __of_detach_node_sysfs(struct device_node *np);
+
+/* iterators for transactions, used for overlays */
+/* forward iterator */
+#define for_each_transaction_entry(_oft, _te) \
+ list_for_each_entry(_te, &(_oft)->te_list, node)
+
+/* reverse iterator */
+#define for_each_transaction_entry_reverse(_oft, _te) \
+ list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
+
#endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 632aae86137..59fb12e84e6 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void)
for (i = 0; i < reserved_mem_count; i++) {
struct reserved_mem *rmem = &reserved_mem[i];
unsigned long node = rmem->fdt_node;
+ int len;
+ const __be32 *prop;
int err = 0;
+ prop = of_get_flat_dt_prop(node, "phandle", &len);
+ if (!prop)
+ prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
+ if (prop)
+ rmem->phandle = of_read_number(prop, len/4);
+
if (rmem->size == 0)
err = __reserved_mem_alloc_size(node, rmem->name,
&rmem->base, &rmem->size);
@@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void)
__reserved_mem_init_node(rmem);
}
}
+
+static inline struct reserved_mem *__find_rmem(struct device_node *node)
+{
+ unsigned int i;
+
+ if (!node->phandle)
+ return NULL;
+
+ for (i = 0; i < reserved_mem_count; i++)
+ if (reserved_mem[i].phandle == node->phandle)
+ return &reserved_mem[i];
+ return NULL;
+}
+
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ *
+ * This function assign memory region pointed by "memory-region" device tree
+ * property to the given device.
+ */
+void of_reserved_mem_device_init(struct device *dev)
+{
+ struct reserved_mem *rmem;
+ struct device_node *np;
+
+ np = of_parse_phandle(dev->of_node, "memory-region", 0);
+ if (!np)
+ return;
+
+ rmem = __find_rmem(np);
+ of_node_put(np);
+
+ if (!rmem || !rmem->ops || !rmem->ops->device_init)
+ return;
+
+ rmem->ops->device_init(rmem, dev);
+ dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+}
+
+/**
+ * of_reserved_mem_device_release() - release reserved memory device structures
+ *
+ * This function releases structures allocated for memory region handling for
+ * the given device.
+ */
+void of_reserved_mem_device_release(struct device *dev)
+{
+ struct reserved_mem *rmem;
+ struct device_node *np;
+
+ np = of_parse_phandle(dev->of_node, "memory-region", 0);
+ if (!np)
+ return;
+
+ rmem = __find_rmem(np);
+ of_node_put(np);
+
+ if (!rmem || !rmem->ops || !rmem->ops->device_release)
+ return;
+
+ rmem->ops->device_release(rmem, dev);
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 500436f9be7..0197725e033 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus,
break;
}
}
+ of_node_set_flag(bus, OF_POPULATED_BUS);
return rc;
}
@@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate);
static int of_platform_device_destroy(struct device *dev, void *data)
{
- bool *children_left = data;
-
/* Do not touch devices not populated from the device tree */
- if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) {
- *children_left = true;
+ if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
return 0;
- }
- /* Recurse, but don't touch this device if it has any children left */
- if (of_platform_depopulate(dev) != 0) {
- *children_left = true;
- return 0;
- }
+ /* Recurse for any nodes that were treated as busses */
+ if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS))
+ device_for_each_child(dev, NULL, of_platform_device_destroy);
if (dev->bus == &platform_bus_type)
platform_device_unregister(to_platform_device(dev));
@@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data)
else if (dev->bus == &amba_bustype)
amba_device_unregister(to_amba_device(dev));
#endif
- else {
- *children_left = true;
- return 0;
- }
of_node_clear_flag(dev->of_node, OF_POPULATED);
-
+ of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
return 0;
}
/**
* of_platform_depopulate() - Remove devices populated from device tree
- * @parent: device which childred will be removed
+ * @parent: device which children will be removed
*
* Complementary to of_platform_populate(), this function removes children
* of the given device (and, recurrently, their children) that have been
@@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data)
* Returns 0 when all children devices have been removed or
* -EBUSY when some children remained.
*/
-int of_platform_depopulate(struct device *parent)
+void of_platform_depopulate(struct device *parent)
{
- bool children_left = false;
-
- device_for_each_child(parent, &children_left,
- of_platform_device_destroy);
-
- return children_left ? -EBUSY : 0;
+ device_for_each_child(parent, NULL, of_platform_device_destroy);
}
EXPORT_SYMBOL_GPL(of_platform_depopulate);
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 077314eebb9..d4100266783 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/list.h>
@@ -16,11 +17,17 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include "of_private.h"
+
static struct selftest_results {
int passed;
int failed;
} selftest_results;
+#define NO_OF_NODES 2
+static struct device_node *nodes[NO_OF_NODES];
+static int last_node_index;
+
#define selftest(result, fmt, ...) { \
if (!(result)) { \
selftest_results.failed++; \
@@ -266,6 +273,81 @@ static void __init of_selftest_property_match_string(void)
selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
}
+#define propcmp(p1, p2) (((p1)->length == (p2)->length) && \
+ (p1)->value && (p2)->value && \
+ !memcmp((p1)->value, (p2)->value, (p1)->length) && \
+ !strcmp((p1)->name, (p2)->name))
+static void __init of_selftest_property_copy(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+ struct property p1 = { .name = "p1", .length = 0, .value = "" };
+ struct property p2 = { .name = "p2", .length = 5, .value = "abcd" };
+ struct property *new;
+
+ new = __of_prop_dup(&p1, GFP_KERNEL);
+ selftest(new && propcmp(&p1, new), "empty property didn't copy correctly\n");
+ kfree(new->value);
+ kfree(new->name);
+ kfree(new);
+
+ new = __of_prop_dup(&p2, GFP_KERNEL);
+ selftest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n");
+ kfree(new->value);
+ kfree(new->name);
+ kfree(new);
+#endif
+}
+
+static void __init of_selftest_changeset(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+ struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
+ struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
+ struct property *ppremove;
+ struct device_node *n1, *n2, *n21, *nremove, *parent;
+ struct of_changeset chgset;
+
+ of_changeset_init(&chgset);
+ n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
+ selftest(n1, "testcase setup failure\n");
+ n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
+ selftest(n2, "testcase setup failure\n");
+ n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
+ selftest(n21, "testcase setup failure %p\n", n21);
+ nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
+ selftest(nremove, "testcase setup failure\n");
+ ppadd = __of_prop_dup(&padd, GFP_KERNEL);
+ selftest(ppadd, "testcase setup failure\n");
+ ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL);
+ selftest(ppupdate, "testcase setup failure\n");
+ parent = nremove->parent;
+ n1->parent = parent;
+ n2->parent = parent;
+ n21->parent = n2;
+ n2->child = n21;
+ ppremove = of_find_property(parent, "prop-remove", NULL);
+ selftest(ppremove, "failed to find removal prop");
+
+ of_changeset_init(&chgset);
+ selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n");
+ selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n");
+ selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n");
+ selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n");
+ selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n");
+ selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
+ selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
+ mutex_lock(&of_mutex);
+ selftest(!of_changeset_apply(&chgset), "apply failed\n");
+ mutex_unlock(&of_mutex);
+
+ mutex_lock(&of_mutex);
+ selftest(!of_changeset_revert(&chgset), "revert failed\n");
+ mutex_unlock(&of_mutex);
+
+ of_changeset_destroy(&chgset);
+#endif
+}
+
static void __init of_selftest_parse_interrupts(void)
{
struct device_node *np;
@@ -517,9 +599,156 @@ static void __init of_selftest_platform_populate(void)
}
}
+/**
+ * update_node_properties - adds the properties
+ * of np into dup node (present in live tree) and
+ * updates parent of children of np to dup.
+ *
+ * @np: node already present in live tree
+ * @dup: node present in live tree to be updated
+ */
+static void update_node_properties(struct device_node *np,
+ struct device_node *dup)
+{
+ struct property *prop;
+ struct device_node *child;
+
+ for_each_property_of_node(np, prop)
+ of_add_property(dup, prop);
+
+ for_each_child_of_node(np, child)
+ child->parent = dup;
+}
+
+/**
+ * attach_node_and_children - attaches nodes
+ * and its children to live tree
+ *
+ * @np: Node to attach to live tree
+ */
+static int attach_node_and_children(struct device_node *np)
+{
+ struct device_node *next, *root = np, *dup;
+
+ if (!np) {
+ pr_warn("%s: No tree to attach; not running tests\n",
+ __func__);
+ return -ENODATA;
+ }
+
+
+ /* skip root node */
+ np = np->child;
+ /* storing a copy in temporary node */
+ dup = np;
+
+ while (dup) {
+ nodes[last_node_index++] = dup;
+ dup = dup->sibling;
+ }
+ dup = NULL;
+
+ while (np) {
+ next = np->allnext;
+ dup = of_find_node_by_path(np->full_name);
+ if (dup)
+ update_node_properties(np, dup);
+ else {
+ np->child = NULL;
+ if (np->parent == root)
+ np->parent = of_allnodes;
+ of_attach_node(np);
+ }
+ np = next;
+ }
+
+ return 0;
+}
+
+/**
+ * selftest_data_add - Reads, copies data from
+ * linked tree and attaches it to the live tree
+ */
+static int __init selftest_data_add(void)
+{
+ void *selftest_data;
+ struct device_node *selftest_data_node;
+ extern uint8_t __dtb_testcases_begin[];
+ extern uint8_t __dtb_testcases_end[];
+ const int size = __dtb_testcases_end - __dtb_testcases_begin;
+
+ if (!size || !of_allnodes) {
+ pr_warn("%s: No testcase data to attach; not running tests\n",
+ __func__);
+ return -ENODATA;
+ }
+
+ /* creating copy */
+ selftest_data = kmemdup(__dtb_testcases_begin, size, GFP_KERNEL);
+
+ if (!selftest_data) {
+ pr_warn("%s: Failed to allocate memory for selftest_data; "
+ "not running tests\n", __func__);
+ return -ENOMEM;
+ }
+ of_fdt_unflatten_tree(selftest_data, &selftest_data_node);
+
+ /* attach the sub-tree to live tree */
+ return attach_node_and_children(selftest_data_node);
+}
+
+/**
+ * detach_node_and_children - detaches node
+ * and its children from live tree
+ *
+ * @np: Node to detach from live tree
+ */
+static void detach_node_and_children(struct device_node *np)
+{
+ while (np->child)
+ detach_node_and_children(np->child);
+
+ while (np->sibling)
+ detach_node_and_children(np->sibling);
+
+ of_detach_node(np);
+}
+
+/**
+ * selftest_data_remove - removes the selftest data
+ * nodes from the live tree
+ */
+static void selftest_data_remove(void)
+{
+ struct device_node *np;
+ struct property *prop;
+
+ while (last_node_index >= 0) {
+ if (nodes[last_node_index]) {
+ np = of_find_node_by_path(nodes[last_node_index]->full_name);
+ if (strcmp(np->full_name, "/aliases") != 0) {
+ detach_node_and_children(np->child);
+ of_detach_node(np);
+ } else {
+ for_each_property_of_node(np, prop) {
+ if (strcmp(prop->name, "testcase-alias") == 0)
+ of_remove_property(np, prop);
+ }
+ }
+ }
+ last_node_index--;
+ }
+}
+
static int __init of_selftest(void)
{
struct device_node *np;
+ int res;
+
+ /* adding data for selftest */
+ res = selftest_data_add();
+ if (res)
+ return res;
np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
if (!np) {
@@ -533,12 +762,18 @@ static int __init of_selftest(void)
of_selftest_dynamic();
of_selftest_parse_phandle_with_args();
of_selftest_property_match_string();
+ of_selftest_property_copy();
+ of_selftest_changeset();
of_selftest_parse_interrupts();
of_selftest_parse_interrupts_extended();
of_selftest_match_node();
of_selftest_platform_populate();
pr_info("end of selftest - %i passed, %i failed\n",
selftest_results.passed, selftest_results.failed);
+
+ /* removing selftest data from live tree */
+ selftest_data_remove();
+
return 0;
}
late_initcall(of_selftest);
diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts
new file mode 100644
index 00000000000..219ef9324e9
--- /dev/null
+++ b/drivers/of/testcase-data/testcases.dts
@@ -0,0 +1,15 @@
+/dts-v1/;
+/ {
+ testcase-data {
+ changeset {
+ prop-update = "hello";
+ prop-remove = "world";
+ node-remove {
+ };
+ };
+ };
+};
+#include "tests-phandle.dtsi"
+#include "tests-interrupts.dtsi"
+#include "tests-match.dtsi"
+#include "tests-platform.dtsi"
diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi
deleted file mode 100644
index 6d8d980ac85..00000000000
--- a/drivers/of/testcase-data/testcases.dtsi
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "tests-phandle.dtsi"
-#include "tests-interrupts.dtsi"
-#include "tests-match.dtsi"
-#include "tests-platform.dtsi"
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 93aa29f6d39..f2945fa73d4 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -375,11 +375,11 @@ static void __exit cleanup_slots(void)
static int __init rpaphp_init(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
- while ((dn = of_find_node_by_name(dn, "pci")))
+ for_each_node_by_name(dn, "pci")
rpaphp_add_slot(dn);
return 0;
diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig
index 6bbc36fbd6e..e4603985dce 100644
--- a/drivers/scsi/cxgbi/cxgb3i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig
@@ -1,6 +1,6 @@
config SCSI_CXGB3_ISCSI
tristate "Chelsio T3 iSCSI support"
- depends on PCI && INET
+ depends on PCI && INET && (IPV6 || IPV6=n)
select NETDEVICES
select ETHERNET
select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 16b2c7d2661..8c4e423037b 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -1,6 +1,6 @@
config SCSI_CXGB4_ISCSI
tristate "Chelsio T4 iSCSI support"
- depends on PCI && INET
+ depends on PCI && INET && (IPV6 || IPV6=n)
select NETDEVICES
select ETHERNET
select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 43fea2219f8..ae45bd99bae 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -472,7 +472,8 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport)
if (delay > 0)
queue_delayed_work(system_long_wq, &rport->reconnect_work,
1UL * delay * HZ);
- if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
+ if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) &&
+ srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev),
rport->state);
scsi_target_block(&shost->shost_gendev);
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 0419b69e270..4f485e88f60 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc)
*
* The byte channel to be used for the console is specified via a "stdout"
* property in the /chosen node.
- *
- * For compatible with legacy device trees, we also look for a "stdout" alias.
*/
static int find_console_handle(void)
{
- struct device_node *np, *np2;
+ struct device_node *np = of_stdout;
const char *sprop = NULL;
const uint32_t *iprop;
- np = of_find_node_by_path("/chosen");
- if (np)
- sprop = of_get_property(np, "stdout-path", NULL);
-
- if (!np || !sprop) {
- of_node_put(np);
- np = of_find_node_by_name(NULL, "aliases");
- if (np)
- sprop = of_get_property(np, "stdout", NULL);
- }
-
- if (!sprop) {
- of_node_put(np);
- return 0;
- }
-
/* We don't care what the aliased node is actually called. We only
* care if it's compatible with "epapr,hv-byte-channel", because that
- * indicates that it's a byte channel node. We use a temporary
- * variable, 'np2', because we can't release 'np' until we're done with
- * 'sprop'.
+ * indicates that it's a byte channel node.
*/
- np2 = of_find_node_by_path(sprop);
- of_node_put(np);
- np = np2;
- if (!np) {
- pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
- return 0;
- }
-
- /* Is it a byte channel? */
- if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
- of_node_put(np);
+ if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel"))
return 0;
- }
stdout_irq = irq_of_parse_and_map(np, 0);
if (stdout_irq == NO_IRQ) {
- pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop);
- of_node_put(np);
+ pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name);
return 0;
}
@@ -167,12 +135,9 @@ static int find_console_handle(void)
if (!iprop) {
pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
np->name);
- of_node_put(np);
return 0;
}
stdout_bc = be32_to_cpu(*iprop);
-
- of_node_put(np);
return 1;
}
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index a585079b4b3..a2cc5f834c6 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -342,22 +342,13 @@ static void udbg_init_opal_common(void)
void __init hvc_opal_init_early(void)
{
- struct device_node *stdout_node = NULL;
+ struct device_node *stdout_node = of_node_get(of_stdout);
const __be32 *termno;
- const char *name = NULL;
const struct hv_ops *ops;
u32 index;
- /* find the boot console from /chosen/stdout */
- if (of_chosen)
- name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- if (name) {
- stdout_node = of_find_node_by_path(name);
- if (!stdout_node) {
- pr_err("hvc_opal: Failed to locate default console!\n");
- return;
- }
- } else {
+ /* If the console wasn't in /chosen, try /ibm,opal */
+ if (!stdout_node) {
struct device_node *opal, *np;
/* Current OPAL takeover doesn't provide the stdout
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index b594abfbf21..5618b5fc750 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -404,42 +404,35 @@ module_exit(hvc_vio_exit);
void __init hvc_vio_init_early(void)
{
- struct device_node *stdout_node;
const __be32 *termno;
const char *name;
const struct hv_ops *ops;
/* find the boot console from /chosen/stdout */
- if (!of_chosen)
+ if (!of_stdout)
return;
- name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- if (name == NULL)
- return;
- stdout_node = of_find_node_by_path(name);
- if (!stdout_node)
- return;
- name = of_get_property(stdout_node, "name", NULL);
+ name = of_get_property(of_stdout, "name", NULL);
if (!name) {
printk(KERN_WARNING "stdout node missing 'name' property!\n");
- goto out;
+ return;
}
/* Check if it's a virtual terminal */
if (strncmp(name, "vty", 3) != 0)
- goto out;
- termno = of_get_property(stdout_node, "reg", NULL);
+ return;
+ termno = of_get_property(of_stdout, "reg", NULL);
if (termno == NULL)
- goto out;
+ return;
hvterm_priv0.termno = of_read_number(termno, 1);
spin_lock_init(&hvterm_priv0.buf_lock);
hvterm_privs[0] = &hvterm_priv0;
/* Check the protocol */
- if (of_device_is_compatible(stdout_node, "hvterm1")) {
+ if (of_device_is_compatible(of_stdout, "hvterm1")) {
hvterm_priv0.proto = HV_PROTOCOL_RAW;
ops = &hvterm_raw_ops;
}
- else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
+ else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) {
hvterm_priv0.proto = HV_PROTOCOL_HVSI;
ops = &hvterm_hvsi_ops;
hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
@@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void)
/* HVSI, perform the handshake now */
hvsilib_establish(&hvterm_priv0.hvsi);
} else
- goto out;
+ return;
udbg_putc = udbg_hvc_putc;
udbg_getc = udbg_hvc_getc;
udbg_getc_poll = udbg_hvc_getc_poll;
@@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void)
* backend for HVSI, only do udbg
*/
if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
- goto out;
+ return;
#endif
/* Check whether the user has requested a different console. */
if (!strstr(cmd_line, "console="))
add_preferred_console("hvc", 0, NULL);
hvc_instantiate(0, 0, ops);
-out:
- of_node_put(stdout_node);
}
/* call this from early_init() for a working debug console on
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index f7ad5b90305..abbfedb8490 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -1653,8 +1653,7 @@ static int __init pmz_probe(void)
/*
* Find all escc chips in the system
*/
- node_p = of_find_node_by_name(NULL, "escc");
- while (node_p) {
+ for_each_node_by_name(node_p, "escc") {
/*
* First get channel A/B node pointers
*
@@ -1672,7 +1671,7 @@ static int __init pmz_probe(void)
of_node_put(node_b);
printk(KERN_ERR "pmac_zilog: missing node %c for escc %s\n",
(!node_a) ? 'a' : 'b', node_p->full_name);
- goto next;
+ continue;
}
/*
@@ -1699,11 +1698,9 @@ static int __init pmz_probe(void)
of_node_put(node_b);
memset(&pmz_ports[count], 0, sizeof(struct uart_pmac_port));
memset(&pmz_ports[count+1], 0, sizeof(struct uart_pmac_port));
- goto next;
+ continue;
}
count += 2;
-next:
- node_p = of_find_node_by_name(node_p, "escc");
}
pmz_ports_count = count;
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 8bb19da0163..29a7be47389 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/console.h>
+#include <linux/of.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/device.h>
@@ -2611,6 +2612,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
spin_lock_init(&uport->lock);
lockdep_set_class(&uport->lock, &port_lock_key);
}
+ if (uport->cons && uport->dev)
+ of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
uart_configure_port(drv, state, uport);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204b921..d8c57636b9c 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
depends on VFIO && SPAPR_TCE_IOMMU
default n
+config VFIO_SPAPR_EEH
+ tristate
+ depends on EEH && VFIO_IOMMU_SPAPR_TCE
+ default n
+
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
select VFIO_IOMMU_TYPE1 if X86
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+ select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc75e8..0b035b12600 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
obj-$(CONFIG_VFIO) += vfio.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f36e3..f7825332a32 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nointxmask,
"Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
+static DEFINE_MUTEX(driver_lock);
+
+static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
+
static int vfio_pci_enable(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
@@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
u16 cmd;
u8 msix_pos;
+ /* Don't allow our initial saved state to include busmaster */
+ pci_clear_master(pdev);
+
ret = pci_enable_device(pdev);
if (ret)
return ret;
@@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
struct pci_dev *pdev = vdev->pdev;
int bar;
- pci_disable_device(pdev);
+ /* Stop the device from further DMA */
+ pci_clear_master(pdev);
vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
VFIO_IRQ_SET_ACTION_TRIGGER,
@@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vdev->barmap[bar] = NULL;
}
+ vdev->needs_reset = true;
+
/*
* If we have saved state, restore it. If we can reset the device,
* even better. Resetting with current state seems better than
@@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
__func__, dev_name(&pdev->dev));
if (!vdev->reset_works)
- return;
+ goto out;
pci_save_state(pdev);
}
@@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
if (ret)
pr_warn("%s: Failed to reset device %s (%d)\n",
__func__, dev_name(&pdev->dev), ret);
+ else
+ vdev->needs_reset = false;
}
pci_restore_state(pdev);
+out:
+ pci_disable_device(pdev);
+
+ vfio_pci_try_bus_reset(vdev);
}
static void vfio_pci_release(void *device_data)
{
struct vfio_pci_device *vdev = device_data;
- if (atomic_dec_and_test(&vdev->refcnt)) {
+ mutex_lock(&driver_lock);
+
+ if (!(--vdev->refcnt)) {
vfio_spapr_pci_eeh_release(vdev->pdev);
vfio_pci_disable(vdev);
}
+ mutex_unlock(&driver_lock);
+
module_put(THIS_MODULE);
}
static int vfio_pci_open(void *device_data)
{
struct vfio_pci_device *vdev = device_data;
- int ret;
+ int ret = 0;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- if (atomic_inc_return(&vdev->refcnt) == 1) {
+ mutex_lock(&driver_lock);
+
+ if (!vdev->refcnt) {
ret = vfio_pci_enable(vdev);
if (ret)
goto error;
- ret = vfio_spapr_pci_eeh_open(vdev->pdev);
- if (ret) {
- vfio_pci_disable(vdev);
- goto error;
- }
+ vfio_spapr_pci_eeh_open(vdev->pdev);
}
-
- return 0;
+ vdev->refcnt++;
error:
- module_put(THIS_MODULE);
+ mutex_unlock(&driver_lock);
+ if (ret)
+ module_put(THIS_MODULE);
return ret;
}
@@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
spin_lock_init(&vdev->irqlock);
- atomic_set(&vdev->refcnt, 0);
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
@@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev)
{
struct vfio_pci_device *vdev;
+ mutex_lock(&driver_lock);
+
vdev = vfio_del_group_dev(&pdev->dev);
- if (!vdev)
- return;
+ if (vdev) {
+ iommu_group_put(pdev->dev.iommu_group);
+ kfree(vdev);
+ }
- iommu_group_put(pdev->dev.iommu_group);
- kfree(vdev);
+ mutex_unlock(&driver_lock);
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = {
.err_handler = &vfio_err_handlers,
};
+/*
+ * Test whether a reset is necessary and possible. We mark devices as
+ * needs_reset when they are released, but don't have a function-local reset
+ * available. If any of these exist in the affected devices, we want to do
+ * a bus/slot reset. We also need all of the affected devices to be unused,
+ * so we abort if any device has a non-zero refcnt. driver_lock prevents a
+ * device from being opened during the scan or unbound from vfio-pci.
+ */
+static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
+{
+ bool *needs_reset = data;
+ struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
+ int ret = -EBUSY;
+
+ if (pci_drv == &vfio_pci_driver) {
+ struct vfio_device *device;
+ struct vfio_pci_device *vdev;
+
+ device = vfio_device_get_from_dev(&pdev->dev);
+ if (!device)
+ return ret;
+
+ vdev = vfio_device_data(device);
+ if (vdev) {
+ if (vdev->needs_reset)
+ *needs_reset = true;
+
+ if (!vdev->refcnt)
+ ret = 0;
+ }
+
+ vfio_device_put(device);
+ }
+
+ /*
+ * TODO: vfio-core considers groups to be viable even if some devices
+ * are attached to known drivers, like pci-stub or pcieport. We can't
+ * freeze devices from being unbound to those drivers like we can
+ * here though, so it would be racy to test for them. We also can't
+ * use device_lock() to prevent changes as that would interfere with
+ * PCI-core taking device_lock during bus reset. For now, we require
+ * devices to be bound to vfio-pci to get a bus/slot reset on release.
+ */
+
+ return ret;
+}
+
+/* Clear needs_reset on all affected devices after successful bus/slot reset */
+static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
+{
+ struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
+
+ if (pci_drv == &vfio_pci_driver) {
+ struct vfio_device *device;
+ struct vfio_pci_device *vdev;
+
+ device = vfio_device_get_from_dev(&pdev->dev);
+ if (!device)
+ return 0;
+
+ vdev = vfio_device_data(device);
+ if (vdev)
+ vdev->needs_reset = false;
+
+ vfio_device_put(device);
+ }
+
+ return 0;
+}
+
+/*
+ * Attempt to do a bus/slot reset if there are devices affected by a reset for
+ * this device that are needs_reset and all of the affected devices are unused
+ * (!refcnt). Callers of this function are required to hold driver_lock such
+ * that devices can not be unbound from vfio-pci or opened by a user while we
+ * test for and perform a bus/slot reset.
+ */
+static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
+{
+ bool needs_reset = false, slot = false;
+ int ret;
+
+ if (!pci_probe_reset_slot(vdev->pdev->slot))
+ slot = true;
+ else if (pci_probe_reset_bus(vdev->pdev->bus))
+ return;
+
+ if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
+ vfio_pci_test_bus_reset,
+ &needs_reset, slot) || !needs_reset)
+ return;
+
+ if (slot)
+ ret = pci_try_reset_slot(vdev->pdev->slot);
+ else
+ ret = pci_try_reset_bus(vdev->pdev->bus);
+
+ if (ret)
+ return;
+
+ vfio_pci_for_each_slot_or_bus(vdev->pdev,
+ vfio_pci_clear_needs_reset, NULL, slot);
+}
+
static void __exit vfio_pci_cleanup(void)
{
pci_unregister_driver(&vfio_pci_driver);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9c6d5d0f3b0..671c17a6e6d 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -54,8 +54,9 @@ struct vfio_pci_device {
bool extended_caps;
bool bardirty;
bool has_vga;
+ bool needs_reset;
struct pci_saved_state *pci_saved_state;
- atomic_t refcnt;
+ int refcnt;
struct eventfd_ctx *err_trigger;
};
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4ce143..86dfceb9201 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,20 +9,27 @@
* published by the Free Software Foundation.
*/
+#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <asm/eeh.h>
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC "VFIO IOMMU SPAPR EEH"
+
/* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
{
- return eeh_dev_open(pdev);
+ eeh_dev_open(pdev);
}
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
{
eeh_dev_release(pdev);
}
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
unsigned int cmd, unsigned long arg)
@@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
return ret;
}
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d2633ee099d..b39e5000ff5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -308,6 +308,7 @@ struct bio_integrity_payload {
unsigned short bip_slab; /* slab the bip came from */
unsigned short bip_vcnt; /* # of integrity bio_vecs */
+ unsigned short bip_max_vcnt; /* integrity bio_vec slots */
unsigned bip_owns_buf:1; /* should free bip_buf */
struct work_struct bip_work; /* I/O completion */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8699bcf5f09..518b46555b8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -21,6 +21,7 @@
#include <linux/bsg.h>
#include <linux/smp.h>
#include <linux/rcupdate.h>
+#include <linux/percpu-refcount.h>
#include <asm/scatterlist.h>
@@ -470,6 +471,7 @@ struct request_queue {
struct mutex sysfs_lock;
int bypass_depth;
+ int mq_freeze_depth;
#if defined(CONFIG_BLK_DEV_BSG)
bsg_job_fn *bsg_job_fn;
@@ -483,7 +485,7 @@ struct request_queue {
#endif
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
- struct percpu_counter mq_usage_counter;
+ struct percpu_ref mq_usage_counter;
struct list_head all_q_node;
struct blk_mq_tag_set *tag_set;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 3dbe9bd57a0..debb70d4054 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
#endif
extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.3"
+#define REL_VERSION "8.4.5"
#define API_VERSION 1
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 101
@@ -245,7 +245,7 @@ enum drbd_disk_state {
D_DISKLESS,
D_ATTACHING, /* In the process of reading the meta-data */
D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */
- /* when >= D_FAILED it is legal to access mdev->bc */
+ /* when >= D_FAILED it is legal to access mdev->ldev */
D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */
D_INCONSISTENT,
D_OUTDATED,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 4193f5f2636..7b131ed8f9c 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -171,6 +171,10 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative)
__flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF)
/* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */
+ /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
+ /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */
+ __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
+ __u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF)
)
GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 17e50bb0052..8ac8c5d9a3a 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -214,6 +214,7 @@
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
#define DRBD_ALWAYS_ASBP_DEF 0
#define DRBD_USE_RLE_DEF 1
+#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0
#define DRBD_AL_STRIPES_MIN 1
#define DRBD_AL_STRIPES_MAX 1024
@@ -224,4 +225,9 @@
#define DRBD_AL_STRIPE_SIZE_MAX 16777216
#define DRBD_AL_STRIPE_SIZE_DEF 32
#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
+
+#define DRBD_SOCKET_CHECK_TIMEO_MIN 0
+#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX
+#define DRBD_SOCKET_CHECK_TIMEO_DEF 0
+#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1'
#endif
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index c8450366c13..379c02648ab 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -116,6 +116,7 @@ enum {
/* special QP and management commands */
MLX4_CMD_CONF_SPECIAL_QP = 0x23,
MLX4_CMD_MAD_IFC = 0x24,
+ MLX4_CMD_MAD_DEMUX = 0x203,
/* multicast commands */
MLX4_CMD_READ_MCG = 0x25,
@@ -186,6 +187,12 @@ enum {
};
enum {
+ MLX4_CMD_MAD_DEMUX_CONFIG = 0,
+ MLX4_CMD_MAD_DEMUX_QUERY_STATE = 1,
+ MLX4_CMD_MAD_DEMUX_QUERY_RESTR = 2, /* Query mad demux restrictions */
+};
+
+enum {
MLX4_CMD_WRAPPED,
MLX4_CMD_NATIVE
};
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e15b1544ea8..071f6b23460 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -183,6 +183,7 @@ enum {
MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8,
MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9,
MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10,
+ MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11,
};
enum {
@@ -273,6 +274,7 @@ enum {
MLX4_PERM_REMOTE_WRITE = 1 << 13,
MLX4_PERM_ATOMIC = 1 << 14,
MLX4_PERM_BIND_MW = 1 << 15,
+ MLX4_PERM_MASK = 0xFC00
};
enum {
@@ -1254,6 +1256,21 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
int enable);
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry ***mpt_entry);
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry **mpt_entry);
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+ u32 pdn);
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry *mpt_entry,
+ u32 access);
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry **mpt_entry);
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+ u64 iova, u64 size, int npages,
+ int page_shift, struct mlx4_mpt_entry *mpt_entry);
/* Returns true if running in low memory profile (kdump kernel) */
static inline bool mlx4_low_memory_profile(void)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index babaea93bca..29ce014ab42 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -213,6 +213,8 @@ struct dw_mci_dma_ops {
#define DW_MCI_QUIRK_HIGHSPEED BIT(2)
/* Unreliable card detection */
#define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3)
+/* No write protect */
+#define DW_MCI_QUIRK_NO_WRITE_PROTECT BIT(4)
/* Slot level quirks */
/* This slot has no write protect */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 08abe994188..09ebe57d5ce 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -104,9 +104,6 @@ struct sdhci_host {
const struct sdhci_ops *ops; /* Low level hw interface */
- struct regulator *vmmc; /* Power regulator (vmmc) */
- struct regulator *vqmmc; /* Signaling regulator (vccq) */
-
/* Internal data */
struct mmc_host *mmc; /* MMC structure */
u64 dma_mask; /* custom DMA mask */
diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c1ef4..6c4363b8ddc 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -74,8 +74,6 @@ struct of_phandle_args {
uint32_t args[MAX_PHANDLE_ARGS];
};
-extern int of_node_add(struct device_node *node);
-
/* initialize a node */
extern struct kobj_type of_node_ktype;
static inline void of_node_init(struct device_node *node)
@@ -113,6 +111,7 @@ static inline void of_node_put(struct device_node *node) { }
extern struct device_node *of_allnodes;
extern struct device_node *of_chosen;
extern struct device_node *of_aliases;
+extern struct device_node *of_stdout;
extern raw_spinlock_t devtree_lock;
static inline bool of_have_populated_dt(void)
@@ -204,6 +203,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */
#define OF_DETACHED 2 /* node has been detached from the device tree */
#define OF_POPULATED 3 /* device already created for the node */
+#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */
#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
@@ -322,6 +322,7 @@ extern int of_update_property(struct device_node *np, struct property *newprop);
struct of_prop_reconfig {
struct device_node *dn;
struct property *prop;
+ struct property *old_prop;
};
extern int of_reconfig_notifier_register(struct notifier_block *);
@@ -352,7 +353,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
*/
const char *of_prop_next_string(struct property *prop, const char *cur);
-int of_device_is_stdout_path(struct device_node *dn);
+bool of_console_check(struct device_node *dn, char *name, int index);
#else /* CONFIG_OF */
@@ -564,9 +565,9 @@ static inline int of_machine_is_compatible(const char *compat)
return 0;
}
-static inline int of_device_is_stdout_path(struct device_node *dn)
+static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
{
- return 0;
+ return false;
}
static inline const __be32 *of_prop_next_u32(struct property *prop,
@@ -786,4 +787,80 @@ typedef void (*of_init_fn_1)(struct device_node *);
#define OF_DECLARE_2(table, name, compat, fn) \
_OF_DECLARE(table, name, compat, fn, of_init_fn_2)
+/**
+ * struct of_changeset_entry - Holds a changeset entry
+ *
+ * @node: list_head for the log list
+ * @action: notifier action
+ * @np: pointer to the device node affected
+ * @prop: pointer to the property affected
+ * @old_prop: hold a pointer to the original property
+ *
+ * Every modification of the device tree during a changeset
+ * is held in a list of of_changeset_entry structures.
+ * That way we can recover from a partial application, or we can
+ * revert the changeset
+ */
+struct of_changeset_entry {
+ struct list_head node;
+ unsigned long action;
+ struct device_node *np;
+ struct property *prop;
+ struct property *old_prop;
+};
+
+/**
+ * struct of_changeset - changeset tracker structure
+ *
+ * @entries: list_head for the changeset entries
+ *
+ * changesets are a convenient way to apply bulk changes to the
+ * live tree. In case of an error, changes are rolled-back.
+ * changesets live on after initial application, and if not
+ * destroyed after use, they can be reverted in one single call.
+ */
+struct of_changeset {
+ struct list_head entries;
+};
+
+#ifdef CONFIG_OF_DYNAMIC
+extern void of_changeset_init(struct of_changeset *ocs);
+extern void of_changeset_destroy(struct of_changeset *ocs);
+extern int of_changeset_apply(struct of_changeset *ocs);
+extern int of_changeset_revert(struct of_changeset *ocs);
+extern int of_changeset_action(struct of_changeset *ocs,
+ unsigned long action, struct device_node *np,
+ struct property *prop);
+
+static inline int of_changeset_attach_node(struct of_changeset *ocs,
+ struct device_node *np)
+{
+ return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL);
+}
+
+static inline int of_changeset_detach_node(struct of_changeset *ocs,
+ struct device_node *np)
+{
+ return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL);
+}
+
+static inline int of_changeset_add_property(struct of_changeset *ocs,
+ struct device_node *np, struct property *prop)
+{
+ return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop);
+}
+
+static inline int of_changeset_remove_property(struct of_changeset *ocs,
+ struct device_node *np, struct property *prop)
+{
+ return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop);
+}
+
+static inline int of_changeset_update_property(struct of_changeset *ocs,
+ struct device_node *np, struct property *prop)
+{
+ return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
+}
+#endif
+
#endif /* _LINUX_OF_H */
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index d96e1badbee..c2b0627a231 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -72,7 +72,7 @@ extern int of_platform_populate(struct device_node *root,
const struct of_device_id *matches,
const struct of_dev_auxdata *lookup,
struct device *parent);
-extern int of_platform_depopulate(struct device *parent);
+extern void of_platform_depopulate(struct device *parent);
#else
static inline int of_platform_populate(struct device_node *root,
const struct of_device_id *matches,
@@ -81,10 +81,7 @@ static inline int of_platform_populate(struct device_node *root,
{
return -ENODEV;
}
-static inline int of_platform_depopulate(struct device *parent)
-{
- return -ENODEV;
-}
+static inline void of_platform_depopulate(struct device *parent) { }
#endif
#endif /* _LINUX_OF_PLATFORM_H */
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 4669ddfdd5a..5b5efae0913 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -8,6 +8,7 @@ struct reserved_mem_ops;
struct reserved_mem {
const char *name;
unsigned long fdt_node;
+ unsigned long phandle;
const struct reserved_mem_ops *ops;
phys_addr_t base;
phys_addr_t size;
@@ -27,10 +28,16 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
#ifdef CONFIG_OF_RESERVED_MEM
+void of_reserved_mem_device_init(struct device *dev);
+void of_reserved_mem_device_release(struct device *dev);
+
void fdt_init_reserved_mem(void);
void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
phys_addr_t base, phys_addr_t size);
#else
+static inline void of_reserved_mem_device_init(struct device *dev) { }
+static inline void of_reserved_mem_device_release(struct device *pdev) { }
+
static inline void fdt_init_reserved_mem(void) { }
static inline void fdt_reserved_mem_save_node(unsigned long node,
const char *uname, phys_addr_t base, phys_addr_t size) { }
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 2bf1b30cb5d..51e70cf25cb 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -28,6 +28,7 @@
*/
#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT BIT(0)
#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ BIT(1)
+#define OMAP_HSMMC_SWAKEUP_MISSING BIT(2)
struct mmc_card;
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 0990997a530..d78125f73ac 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,6 +10,9 @@
extern const char linux_banner[];
extern const char linux_proc_banner[];
+extern char *log_buf_addr_get(void);
+extern u32 log_buf_len_get(void);
+
static inline int printk_get_level(const char *buffer)
{
if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9cda293c867..36826c0166c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -21,7 +21,7 @@
#include <linux/rculist.h>
struct rhash_head {
- struct rhash_head *next;
+ struct rhash_head __rcu *next;
};
#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
@@ -97,7 +97,7 @@ u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
- struct rhash_head **pprev, gfp_t flags);
+ struct rhash_head __rcu **pprev, gfp_t flags);
bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
@@ -117,18 +117,12 @@ void rhashtable_destroy(const struct rhashtable *ht);
#define rht_dereference_rcu(p, ht) \
rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
-/* Internal, use rht_obj() instead */
#define rht_entry(ptr, type, member) container_of(ptr, type, member)
#define rht_entry_safe(ptr, type, member) \
({ \
typeof(ptr) __ptr = (ptr); \
__ptr ? rht_entry(__ptr, type, member) : NULL; \
})
-#define rht_entry_safe_rcu(ptr, type, member) \
-({ \
- typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
- __ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \
-})
#define rht_next_entry_safe(pos, ht, member) \
({ \
@@ -205,9 +199,10 @@ void rhashtable_destroy(const struct rhashtable *ht);
* traversal is guarded by rcu_read_lock().
*/
#define rht_for_each_entry_rcu(pos, head, member) \
- for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \
+ for (pos = rht_entry_safe(rcu_dereference_raw(head), \
+ typeof(*(pos)), member); \
pos; \
- pos = rht_entry_safe_rcu((pos)->member.next, \
- typeof(*(pos)), member))
+ pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \
+ typeof(*(pos)), member))
#endif /* _LINUX_RHASHTABLE_H */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 25a0fbd4b99..d3204115f15 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,16 +98,16 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
extern long vfio_external_check_extension(struct vfio_group *group,
unsigned long arg);
+struct pci_dev;
#ifdef CONFIG_EEH
-extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
unsigned int cmd,
unsigned long arg);
#else
-static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
{
- return 0;
}
static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7a431388756..5fbe6568c3c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops {
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
int (*bind_conflict)(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax);
+ void (*mtu_reduced)(struct sock *sk);
};
/** inet_connection_sock - INET connection oriented sock
diff --git a/include/net/sock.h b/include/net/sock.h
index 38805fa02e4..7f2ab72f321 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -987,7 +987,6 @@ struct proto {
struct sk_buff *skb);
void (*release_cb)(struct sock *sk);
- void (*mtu_reduced)(struct sock *sk);
/* Keeping track of sk's, looking them up, and port selection methods. */
void (*hash)(struct sock *sk);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index dafa1cbc149..590e01a476a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -417,7 +417,7 @@ void tcp_update_metrics(struct sock *sk);
void tcp_init_metrics(struct sock *sk);
void tcp_metrics_init(void);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
- bool paws_check);
+ bool paws_check, bool timestamps);
bool tcp_remember_stamp(struct sock *sk);
bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
@@ -448,6 +448,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
*/
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+void tcp_v4_mtu_reduced(struct sock *sk);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(struct sock *sk,
struct request_sock *req,
@@ -705,8 +706,10 @@ struct tcp_skb_cb {
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
#define TCPCB_LOST 0x04 /* SKB is lost */
#define TCPCB_TAGBITS 0x07 /* All tag bits */
+#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */
#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
-#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
+#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
+ TCPCB_REPAIRED)
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
/* 1 byte hole */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 3d81b90cc31..9bb99e983f5 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -40,6 +40,7 @@
#include <linux/list.h>
#include <rdma/ib_verbs.h>
+#include <uapi/rdma/ib_user_mad.h>
/* Management base version */
#define IB_MGMT_BASE_VERSION 1
@@ -355,9 +356,13 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
* @hi_tid: Access layer assigned transaction ID for this client.
* Unsolicited MADs sent by this client will have the upper 32-bits
* of their TID set to this value.
+ * @flags: registration flags
* @port_num: Port number on which QP is registered
* @rmpp_version: If set, indicates the RMPP version used by this agent.
*/
+enum {
+ IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP,
+};
struct ib_mad_agent {
struct ib_device *device;
struct ib_qp *qp;
@@ -367,6 +372,7 @@ struct ib_mad_agent {
ib_mad_snoop_handler snoop_handler;
void *context;
u32 hi_tid;
+ u32 flags;
u8 port_num;
u8 rmpp_version;
};
@@ -426,6 +432,7 @@ struct ib_mad_recv_wc {
* in the range from 0x30 to 0x4f. Otherwise not used.
* @method_mask: The caller will receive unsolicited MADs for any method
* where @method_mask = 1.
+ *
*/
struct ib_mad_reg_req {
u8 mgmt_class;
@@ -451,6 +458,7 @@ struct ib_mad_reg_req {
* @recv_handler: The completion callback routine invoked for a received
* MAD.
* @context: User specified context associated with the registration.
+ * @registration_flags: Registration flags to set for this agent
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 port_num,
@@ -459,7 +467,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 rmpp_version,
ib_mad_send_handler send_handler,
ib_mad_recv_handler recv_handler,
- void *context);
+ void *context,
+ u32 registration_flags);
enum ib_mad_snoop_flags {
/*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
@@ -661,4 +670,11 @@ void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
*/
void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
+/**
+ * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP.
+ * @agent: the agent in question
+ * @return: true if agent is performing rmpp, false otherwise.
+ */
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent);
+
#endif /* IB_MAD_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7ccef342f72..ed44cc07a7b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1097,7 +1097,8 @@ struct ib_mr_attr {
enum ib_mr_rereg_flags {
IB_MR_REREG_TRANS = 1,
IB_MR_REREG_PD = (1<<1),
- IB_MR_REREG_ACCESS = (1<<2)
+ IB_MR_REREG_ACCESS = (1<<2),
+ IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
};
/**
@@ -1547,6 +1548,13 @@ struct ib_device {
u64 virt_addr,
int mr_access_flags,
struct ib_udata *udata);
+ int (*rereg_user_mr)(struct ib_mr *mr,
+ int flags,
+ u64 start, u64 length,
+ u64 virt_addr,
+ int mr_access_flags,
+ struct ib_pd *pd,
+ struct ib_udata *udata);
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 9859355a7cf..750e5db7c6b 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -86,7 +86,9 @@ typedef struct sg_io_hdr
#define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */
#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
/* user space (debug indirect IO) */
-#define SG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */
+/* defaults:: for sg driver: Q_AT_HEAD; for block layer: Q_AT_TAIL */
+#define SG_FLAG_Q_AT_TAIL 0x10
+#define SG_FLAG_Q_AT_HEAD 0x20
/* following 'info' values are "or"-ed together */
#define SG_INFO_OK_MASK 0x1
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index c9c3c044b32..981acf74b14 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -148,11 +148,13 @@ TRACE_EVENT(bcache_read,
);
TRACE_EVENT(bcache_write,
- TP_PROTO(struct bio *bio, bool writeback, bool bypass),
- TP_ARGS(bio, writeback, bypass),
+ TP_PROTO(struct cache_set *c, u64 inode, struct bio *bio,
+ bool writeback, bool bypass),
+ TP_ARGS(c, inode, bio, writeback, bypass),
TP_STRUCT__entry(
- __field(dev_t, dev )
+ __array(char, uuid, 16 )
+ __field(u64, inode )
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
@@ -161,7 +163,8 @@ TRACE_EVENT(bcache_write,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ memcpy(__entry->uuid, c->sb.set_uuid, 16);
+ __entry->inode = inode;
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
@@ -169,8 +172,8 @@ TRACE_EVENT(bcache_write,
__entry->bypass = bypass;
),
- TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
+ TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u",
+ __entry->uuid, __entry->inode,
__entry->rwbs, (unsigned long long)__entry->sector,
__entry->nr_sector, __entry->writeback, __entry->bypass)
);
@@ -258,9 +261,9 @@ DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
TP_ARGS(b)
);
-DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
- TP_PROTO(struct btree *b),
- TP_ARGS(b)
+DEFINE_EVENT(cache_set, bcache_btree_node_alloc_fail,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c)
);
DEFINE_EVENT(btree_node, bcache_btree_node_free,
diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
new file mode 100644
index 00000000000..b59b065e9e5
--- /dev/null
+++ b/include/trace/events/thp.h
@@ -0,0 +1,88 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM thp
+
+#if !defined(_TRACE_THP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_THP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(hugepage_invalidate,
+
+ TP_PROTO(unsigned long addr, unsigned long pte),
+ TP_ARGS(addr, pte),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pte)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pte = pte;
+ ),
+
+ TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx",
+ __entry->addr, __entry->pte)
+);
+
+TRACE_EVENT(hugepage_set_pmd,
+
+ TP_PROTO(unsigned long addr, unsigned long pmd),
+ TP_ARGS(addr, pmd),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pmd)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pmd = pmd;
+ ),
+
+ TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, __entry->pmd)
+);
+
+
+TRACE_EVENT(hugepage_update,
+
+ TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, unsigned long set),
+ TP_ARGS(addr, pte, clr, set),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pte)
+ __field(unsigned long, clr)
+ __field(unsigned long, set)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pte = pte;
+ __entry->clr = clr;
+ __entry->set = set;
+
+ ),
+
+ TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
+);
+TRACE_EVENT(hugepage_splitting,
+
+ TP_PROTO(unsigned long addr, unsigned long pte),
+ TP_ARGS(addr, pte),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pte)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pte = pte;
+ ),
+
+ TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx",
+ __entry->addr, __entry->pte)
+);
+
+#endif /* _TRACE_THP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bsg.h b/include/uapi/linux/bsg.h
index 7a12e1c0f37..02986cf8b6f 100644
--- a/include/uapi/linux/bsg.h
+++ b/include/uapi/linux/bsg.h
@@ -10,12 +10,13 @@
#define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2
/*
- * For flags member below
- * sg.h sg_io_hdr also has bits defined for it's flags member. However
- * none of these bits are implemented/used by bsg. The bits below are
- * allocated to not conflict with sg.h ones anyway.
+ * For flag constants below:
+ * sg.h sg_io_hdr also has bits defined for it's flags member. These
+ * two flag values (0x10 and 0x20) have the same meaning in sg.h . For
+ * bsg the BSG_FLAG_Q_AT_HEAD flag is ignored since it is the deafult.
*/
-#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */
+#define BSG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */
+#define BSG_FLAG_Q_AT_HEAD 0x20
struct sg_io_v4 {
__s32 guard; /* [i] 'Q' to differentiate from v3 */
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 6d8e61c4856..9ad67b26758 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -40,6 +40,7 @@
#define VIRTIO_BLK_F_WCE 9 /* Writeback mode enabled after reset */
#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
+#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
#ifndef __KERNEL__
/* Old (deprecated) name for VIRTIO_BLK_F_WCE. */
@@ -77,6 +78,10 @@ struct virtio_blk_config {
/* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
__u8 wce;
+ __u8 unused;
+
+ /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
+ __u16 num_queues;
} __attribute__((packed));
/*
diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index d6fce1cbdb9..09f809f323e 100644
--- a/include/uapi/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
@@ -191,6 +191,45 @@ struct ib_user_mad_reg_req {
__u8 rmpp_version;
};
+/**
+ * ib_user_mad_reg_req2 - MAD registration request
+ *
+ * @id - Set by the _kernel_; used by userspace to identify the
+ * registered agent in future requests.
+ * @qpn - Queue pair number; must be 0 or 1.
+ * @mgmt_class - Indicates which management class of MADs should be
+ * receive by the caller. This field is only required if
+ * the user wishes to receive unsolicited MADs, otherwise
+ * it should be 0.
+ * @mgmt_class_version - Indicates which version of MADs for the given
+ * management class to receive.
+ * @res - Ignored.
+ * @flags - additional registration flags; Must be in the set of
+ * flags defined in IB_USER_MAD_REG_FLAGS_CAP
+ * @method_mask - The caller wishes to receive unsolicited MADs for the
+ * methods whose bit(s) is(are) set.
+ * @oui - Indicates IEEE OUI to use when mgmt_class is a vendor
+ * class in the range from 0x30 to 0x4f. Otherwise not
+ * used.
+ * @rmpp_version - If set, indicates the RMPP version to use.
+ */
+enum {
+ IB_USER_MAD_USER_RMPP = (1 << 0),
+};
+#define IB_USER_MAD_REG_FLAGS_CAP (IB_USER_MAD_USER_RMPP)
+struct ib_user_mad_reg_req2 {
+ __u32 id;
+ __u32 qpn;
+ __u8 mgmt_class;
+ __u8 mgmt_class_version;
+ __u16 res;
+ __u32 flags;
+ __u64 method_mask[2];
+ __u32 oui;
+ __u8 rmpp_version;
+ __u8 reserved[3];
+};
+
#define IB_IOCTL_MAGIC 0x1b
#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \
@@ -200,4 +239,7 @@ struct ib_user_mad_reg_req {
#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3)
+#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \
+ struct ib_user_mad_reg_req2)
+
#endif /* IB_USER_MAD_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index cbfdd4ca951..26daf55ff76 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -276,6 +276,22 @@ struct ib_uverbs_reg_mr_resp {
__u32 rkey;
};
+struct ib_uverbs_rereg_mr {
+ __u64 response;
+ __u32 mr_handle;
+ __u32 flags;
+ __u64 start;
+ __u64 length;
+ __u64 hca_va;
+ __u32 pd_handle;
+ __u32 access_flags;
+};
+
+struct ib_uverbs_rereg_mr_resp {
+ __u32 lkey;
+ __u32 rkey;
+};
+
struct ib_uverbs_dereg_mr {
__u32 mr_handle;
};
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 99b80abf360..3066718eb12 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -34,6 +34,7 @@
#define RDMA_USER_CM_H
#include <linux/types.h>
+#include <linux/socket.h>
#include <linux/in6.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_user_sa.h>
diff --git a/init/Kconfig b/init/Kconfig
index 44f9ed3dae2..e84c6423a2e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -268,7 +268,7 @@ config CROSS_MEMORY_ATTACH
help
Enabling this option adds the system calls process_vm_readv and
process_vm_writev which allow a process with the correct privileges
- to directly read from or write to to another process's address space.
+ to directly read from or write to another process' address space.
See the man page for more details.
config FHANDLE
diff --git a/kernel/fork.c b/kernel/fork.c
index 1380d8ace33..0cf9cdb6e49 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1105,7 +1105,7 @@ static void copy_seccomp(struct task_struct *p)
* needed because this new task is not yet running and cannot
* be racing exec.
*/
- BUG_ON(!spin_is_locked(&current->sighand->siglock));
+ assert_spin_locked(&current->sighand->siglock);
/* Ref-count the new filter user, and assign it. */
get_seccomp_filter(current);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index de1a6bb6861..e04c455a0e3 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -272,6 +272,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
+/* Return log buffer address */
+char *log_buf_addr_get(void)
+{
+ return log_buf;
+}
+
+/* Return log buffer size */
+u32 log_buf_len_get(void)
+{
+ return log_buf_len;
+}
+
/* human readable text of the record */
static char *log_text(const struct printk_log *msg)
{
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 25b0043f475..44eb005c669 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -203,7 +203,7 @@ static u32 seccomp_run_filters(int syscall)
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
{
- BUG_ON(!spin_is_locked(&current->sighand->siglock));
+ assert_spin_locked(&current->sighand->siglock);
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
return false;
@@ -214,7 +214,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
static inline void seccomp_assign_mode(struct task_struct *task,
unsigned long seccomp_mode)
{
- BUG_ON(!spin_is_locked(&task->sighand->siglock));
+ assert_spin_locked(&task->sighand->siglock);
task->seccomp.mode = seccomp_mode;
/*
@@ -253,7 +253,7 @@ static inline pid_t seccomp_can_sync_threads(void)
struct task_struct *thread, *caller;
BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
- BUG_ON(!spin_is_locked(&current->sighand->siglock));
+ assert_spin_locked(&current->sighand->siglock);
/* Validate all threads being eligible for synchronization. */
caller = current;
@@ -294,7 +294,7 @@ static inline void seccomp_sync_threads(void)
struct task_struct *thread, *caller;
BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
- BUG_ON(!spin_is_locked(&current->sighand->siglock));
+ assert_spin_locked(&current->sighand->siglock);
/* Synchronize all threads. */
caller = current;
@@ -464,7 +464,7 @@ static long seccomp_attach_filter(unsigned int flags,
unsigned long total_insns;
struct seccomp_filter *walker;
- BUG_ON(!spin_is_locked(&current->sighand->siglock));
+ assert_spin_locked(&current->sighand->siglock);
/* Validate resulting filter length. */
total_insns = filter->prog->len;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f36b02838a4..fb4a9c2cf8d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -338,10 +338,11 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
static inline void update_vsyscall(struct timekeeper *tk)
{
- struct timespec xt;
+ struct timespec xt, wm;
xt = timespec64_to_timespec(tk_xtime(tk));
- update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult,
+ wm = timespec64_to_timespec(tk->wall_to_monotonic);
+ update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
tk->tkr.cycle_last);
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cb45f59685e..07c28323f88 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -143,6 +143,30 @@ config DEBUG_INFO_REDUCED
DEBUG_INFO build and compile times are reduced too.
Only works with newer gcc versions.
+config DEBUG_INFO_SPLIT
+ bool "Produce split debuginfo in .dwo files"
+ depends on DEBUG_INFO
+ help
+ Generate debug info into separate .dwo files. This significantly
+ reduces the build directory size for builds with DEBUG_INFO,
+ because it stores the information only once on disk in .dwo
+ files instead of multiple times in object files and executables.
+ In addition the debug information is also compressed.
+
+ Requires recent gcc (4.7+) and recent gdb/binutils.
+ Any tool that packages or reads debug information would need
+ to know about the .dwo files and include them.
+ Incompatible with older versions of ccache.
+
+config DEBUG_INFO_DWARF4
+ bool "Generate dwarf4 debuginfo"
+ depends on DEBUG_INFO
+ help
+ Generate dwarf4 debug info. This requires recent versions
+ of gcc and gdb. It makes the debug information larger.
+ But it significantly improves the success of resolving
+ variables in gdb on optimized code.
+
config ENABLE_WARN_DEPRECATED
bool "Enable __deprecated logic"
default y
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 4a83ecd0365..852c81e3ba9 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -169,7 +169,7 @@ out_fail:
return NULL;
}
-void lc_free_by_index(struct lru_cache *lc, unsigned i)
+static void lc_free_by_index(struct lru_cache *lc, unsigned i)
{
void *p = lc->lc_element[i];
WARN_ON(!p);
@@ -643,9 +643,10 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index)
* lc_dump - Dump a complete LRU cache to seq in textual form.
* @lc: the lru cache to operate on
* @seq: the &struct seq_file pointer to seq_printf into
- * @utext: user supplied "heading" or other info
+ * @utext: user supplied additional "heading" or other info
* @detail: function pointer the user may provide to dump further details
- * of the object the lc_element is embedded in.
+ * of the object the lc_element is embedded in. May be NULL.
+ * Note: a leading space ' ' and trailing newline '\n' is implied.
*/
void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
void (*detail) (struct seq_file *, struct lc_element *))
@@ -654,16 +655,18 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext
struct lc_element *e;
int i;
- seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
+ seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext);
for (i = 0; i < nr_elements; i++) {
e = lc_element_by_index(lc, i);
- if (e->lc_number == LC_FREE) {
- seq_printf(seq, "\t%2d: FREE\n", i);
- } else {
- seq_printf(seq, "\t%2d: %4u %4u ", i,
- e->lc_number, e->refcnt);
+ if (e->lc_number != e->lc_new_number)
+ seq_printf(seq, "\t%5d: %6d %8d %6d ",
+ i, e->lc_number, e->lc_new_number, e->refcnt);
+ else
+ seq_printf(seq, "\t%5d: %6d %-8s %6d ",
+ i, e->lc_number, "-\"-", e->refcnt);
+ if (detail)
detail(seq, e);
- }
+ seq_putc(seq, '\n');
}
}
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6940cf1662..a2c78810ebc 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -38,16 +38,10 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
#endif
-/**
- * rht_obj - cast hash head to outer object
- * @ht: hash table
- * @he: hashed node
- */
-void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
+static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
{
return (void *) he - ht->p.head_offset;
}
-EXPORT_SYMBOL_GPL(rht_obj);
static u32 __hashfn(const struct rhashtable *ht, const void *key,
u32 len, u32 hsize)
@@ -386,7 +380,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
* deletion when combined with walking or lookup.
*/
void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
- struct rhash_head **pprev, gfp_t flags)
+ struct rhash_head __rcu **pprev, gfp_t flags)
{
struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 9aae6f47433..9eebfadeeee 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -275,6 +275,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
ret = res_counter_memparse_write_strategy(buf, &val);
if (ret)
break;
+ val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx]));
ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
break;
default:
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4c5b8ba0f84..e4853b50cf4 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -833,7 +833,6 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
loff_t *l)
{
struct hlist_node *e = state->node;
- struct lec_arp_table *tmp;
if (!e)
e = tbl->first;
@@ -842,9 +841,7 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
--*l;
}
- tmp = container_of(e, struct lec_arp_table, next);
-
- hlist_for_each_entry_from(tmp, next) {
+ for (; e; e = e->next) {
if (--*l < 0)
break;
}
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d8e5d0c2ebb..1ba23f5018e 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -50,12 +50,12 @@ static void svc_disconnect(struct atm_vcc *vcc)
pr_debug("%p\n", vcc);
if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq(vcc, as_close, NULL, NULL, NULL);
- while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
+ for (;;) {
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (test_bit(ATM_VF_RELEASED, &vcc->flags) || !sigd)
+ break;
schedule();
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_UNINTERRUPTIBLE);
}
finish_wait(sk_sleep(sk), &wait);
}
@@ -126,11 +126,12 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
}
vcc->local = *addr;
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
@@ -202,15 +203,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
}
vcc->remote = *addr;
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
if (flags & O_NONBLOCK) {
- finish_wait(sk_sleep(sk), &wait);
sock->state = SS_CONNECTING;
error = -EINPROGRESS;
goto out;
}
error = 0;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
schedule();
if (!signal_pending(current)) {
@@ -297,11 +297,12 @@ static int svc_listen(struct socket *sock, int backlog)
goto out;
}
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
if (!sigd) {
@@ -387,15 +388,15 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
}
/* wait should be short, so we ignore the non-blocking flag */
set_bit(ATM_VF_WAITING, &new_vcc->flags);
- prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
- TASK_UNINTERRUPTIBLE);
sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
- while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
+ for (;;) {
+ prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &new_vcc->flags) || !sigd)
+ break;
release_sock(sk);
schedule();
lock_sock(sk);
- prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
- TASK_UNINTERRUPTIBLE);
}
finish_wait(sk_sleep(sk_atm(new_vcc)), &wait);
if (!sigd) {
@@ -433,12 +434,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
DEFINE_WAIT(wait);
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
- !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) ||
+ test_bit(ATM_VF_RELEASED, &vcc->flags) || !sigd) {
+ break;
+ }
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
if (!sigd)
@@ -529,18 +532,18 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
lock_sock(sk);
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sigd_enq(vcc, as_addparty, NULL, NULL,
(struct sockaddr_atmsvc *) sockaddr);
if (flags & O_NONBLOCK) {
- finish_wait(sk_sleep(sk), &wait);
error = -EINPROGRESS;
goto out;
}
pr_debug("added wait queue\n");
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
error = xchg(&sk->sk_err_soft, 0);
@@ -558,11 +561,12 @@ static int svc_dropparty(struct socket *sock, int ep_ref)
lock_sock(sk);
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
if (!sigd) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 181b70ebd96..541f26a67ba 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1188,13 +1188,6 @@ new_segment:
goto wait_for_memory;
/*
- * All packets are restored as if they have
- * already been sent.
- */
- if (tp->repair)
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
- /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1203,6 +1196,13 @@ new_segment:
skb_entail(sk, skb);
copy = size_goal;
max = size_goal;
+
+ /* All packets are restored as if they have
+ * already been sent. skb_mstamp isn't set to
+ * avoid wrong rtt estimation.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
}
/* Try to append data to the end of skb. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a3d47af0190..a906e0200ff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2687,7 +2687,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
*/
static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2713,12 +2712,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
if (recovered) {
/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
- icsk->icsk_retransmits = 0;
tcp_try_undo_recovery(sk);
return;
}
- if (flag & FLAG_DATA_ACKED)
- icsk->icsk_retransmits = 0;
if (tcp_is_reno(tp)) {
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
@@ -3050,10 +3046,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt.v64 = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
u8 sacked = scb->sacked;
u32 acked_pcount;
+ if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+ between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
+ __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+
/* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
if (tcp_skb_pcount(skb) == 1 ||
@@ -3107,11 +3108,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->retrans_stamp = 0;
}
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) &&
- between(skb_shinfo(skb)->tskey, prior_snd_una,
- tp->snd_una + 1))
- __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
-
if (!fully_acked)
break;
@@ -3405,8 +3401,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
- if (after(ack, prior_snd_una))
+ if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
+ icsk->icsk_retransmits = 0;
+ }
prior_fackets = tp->fackets_out;
@@ -5979,12 +5977,14 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
* timewait bucket, so that all the necessary checks
* are made in the function processing timewait state.
*/
- if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) {
+ if (tcp_death_row.sysctl_tw_recycle) {
bool strict;
dst = af_ops->route_req(sk, &fl, req, &strict);
+
if (dst && strict &&
- !tcp_peer_is_proven(req, dst, true)) {
+ !tcp_peer_is_proven(req, dst, true,
+ tmp_opt.saw_tstamp)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
@@ -5993,7 +5993,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false)) {
+ !tcp_peer_is_proven(req, dst, false,
+ tmp_opt.saw_tstamp)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dceff5fe8e6..cd17f009aed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
* It can be called through tcp_release_cb() if socket was owned by user
* at the time tcp_v4_err() was called to handle ICMP message.
*/
-static void tcp_v4_mtu_reduced(struct sock *sk)
+void tcp_v4_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -302,6 +302,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
+EXPORT_SYMBOL(tcp_v4_mtu_reduced);
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
@@ -1787,6 +1788,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);
@@ -2406,7 +2408,6 @@ struct proto tcp_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v4_mtu_reduced,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0d54e59b9ea..ed9c9a91851 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -576,7 +576,8 @@ reset:
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
+ bool paws_check, bool timestamps)
{
struct tcp_metrics_block *tm;
bool ret;
@@ -589,7 +590,8 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool pa
if (paws_check) {
if (tm &&
(u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
- (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW)
+ ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
+ !timestamps))
ret = false;
else
ret = true;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8fcfc91964e..5a7c41fbc6d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -800,7 +800,7 @@ void tcp_release_cb(struct sock *sk)
__sock_put(sk);
}
if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
- sk->sk_prot->mtu_reduced(sk);
+ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
}
@@ -1069,6 +1069,21 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
tcp_verify_left_out(tp);
}
+static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
+ !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
+ struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
+ u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
+
+ shinfo->tx_flags &= ~tsflags;
+ shinfo2->tx_flags |= tsflags;
+ swap(shinfo->tskey, shinfo2->tskey);
+ }
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -1136,6 +1151,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
*/
TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
buff->tstamp = skb->tstamp;
+ tcp_fragment_tstamp(skb, buff);
old_factor = tcp_skb_pcount(skb);
@@ -1652,6 +1668,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
+ tcp_fragment_tstamp(skb, buff);
/* Fix up tso_factor for both original and new SKB. */
tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1917,8 +1934,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+ /* "when" is used as a start point for the retransmit timer */
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
goto repair; /* Skip network transmission */
+ }
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2e9ba035fb5..6163f851dc0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f2ce9550239..29964c3d363 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1595,6 +1595,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v6_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1625,6 +1626,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1864,7 +1866,6 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 9ea0c933b9f..a37998c6273 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -622,7 +622,7 @@ void irlap_send_rd_frame(struct irlap_cb *self)
frame = (struct rd_frame *)skb_put(tx_skb, 2);
frame->caddr = self->caddr;
- frame->caddr = RD_RSP | PF_BIT;
+ frame->control = RD_RSP | PF_BIT;
irlap_queue_xmit(self, tx_skb);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2e152e5f218..c416725d28c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2921,6 +2921,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
}
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
rcu_read_lock();
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2970,6 +2971,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
rcu_read_unlock();
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 702fb21bfe1..6d8f2ec481d 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -137,8 +137,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
- if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids))
+ if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
+ kfree(vport);
return ERR_PTR(-EINVAL);
+ }
vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vport->percpu_stats) {
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 122f95c9586..8a9a4e1c7ea 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -215,11 +215,13 @@ else
arg-check = $(if $(strip $(cmd_$@)),,1)
endif
-# >'< substitution is for echo to work,
-# >$< substitution to preserve $ when reloading .cmd file
-# note: when using inline perl scripts [perl -e '...$$t=1;...']
-# in $(cmd_xxx) double $$ your perl vars
-make-cmd = $(subst \\,\\\\,$(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1))))))
+# Replace >$< with >$$< to preserve $ when reloading the .cmd file
+# (needed for make)
+# Replace >#< with >\#< to avoid starting a comment in the .cmd file
+# (needed for make)
+# Replace >'< with >'\''< to be able to enclose the whole string in '...'
+# (needed for the shell)
+make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
# Find any prerequisites that is newer than target or that does not exist.
# PHONY targets skipped in both cases.
@@ -230,7 +232,7 @@ any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^)
if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
@set -e; \
$(echo-cmd) $(cmd_$(1)); \
- echo 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
+ printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
# Execute the command and also postprocess generated .d dependencies file.
if_changed_dep = $(if $(strip $(any-prereq) $(arg-check) ), \
diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index 686cb0d31c7..a651cee84f2 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean
@@ -40,8 +40,8 @@ subdir-ymn := $(addprefix $(obj)/,$(subdir-ymn))
# build a list of files to remove, usually relative to the current
# directory
-__clean-files := $(extra-y) $(always) \
- $(targets) $(clean-files) \
+__clean-files := $(extra-y) $(extra-m) $(extra-) \
+ $(always) $(targets) $(clean-files) \
$(host-progs) \
$(hostprogs-y) $(hostprogs-m) $(hostprogs-)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 65643506c71..f734033af21 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -26,16 +26,6 @@ warning-1 += $(call cc-option, -Wmissing-include-dirs)
warning-1 += $(call cc-option, -Wunused-but-set-variable)
warning-1 += $(call cc-disable-warning, missing-field-initializers)
-# Clang
-warning-1 += $(call cc-disable-warning, initializer-overrides)
-warning-1 += $(call cc-disable-warning, unused-value)
-warning-1 += $(call cc-disable-warning, format)
-warning-1 += $(call cc-disable-warning, unknown-warning-option)
-warning-1 += $(call cc-disable-warning, sign-compare)
-warning-1 += $(call cc-disable-warning, format-zero-length)
-warning-1 += $(call cc-disable-warning, uninitialized)
-warning-1 += $(call cc-option, -fcatch-undefined-behavior)
-
warning-2 := -Waggregate-return
warning-2 += -Wcast-align
warning-2 += -Wdisabled-optimization
@@ -64,4 +54,15 @@ ifeq ("$(strip $(warning))","")
endif
KBUILD_CFLAGS += $(warning)
+else
+
+ifeq ($(COMPILER),clang)
+KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
+KBUILD_CFLAGS += $(call cc-disable-warning, format)
+KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option)
+KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length)
+KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized)
+endif
endif
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 66893643fd7..ab5980f9171 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -20,21 +20,12 @@
# Will compile qconf as a C++ program, and menu as a C program.
# They are linked as C++ code to the executable qconf
-# hostprogs-y := conf
-# conf-objs := conf.o libkconfig.so
-# libkconfig-objs := expr.o type.o
-# Will create a shared library named libkconfig.so that consists of
-# expr.o and type.o (they are both compiled as C code and the object files
-# are made as position independent code).
-# conf.c is compiled as a C program, and conf.o is linked together with
-# libkconfig.so as the executable conf.
-# Note: Shared libraries consisting of C++ files are not supported
-
__hostprogs := $(sort $(hostprogs-y) $(hostprogs-m))
# C code
# Executables compiled from a single .c file
-host-csingle := $(foreach m,$(__hostprogs),$(if $($(m)-objs),,$(m)))
+host-csingle := $(foreach m,$(__hostprogs), \
+ $(if $($(m)-objs)$($(m)-cxxobjs),,$(m)))
# C executables linked based on several .o files
host-cmulti := $(foreach m,$(__hostprogs),\
@@ -44,33 +35,17 @@ host-cmulti := $(foreach m,$(__hostprogs),\
host-cobjs := $(sort $(foreach m,$(__hostprogs),$($(m)-objs)))
# C++ code
-# C++ executables compiled from at least on .cc file
+# C++ executables compiled from at least one .cc file
# and zero or more .c files
host-cxxmulti := $(foreach m,$(__hostprogs),$(if $($(m)-cxxobjs),$(m)))
# C++ Object (.o) files compiled from .cc files
host-cxxobjs := $(sort $(foreach m,$(host-cxxmulti),$($(m)-cxxobjs)))
-# Shared libaries (only .c supported)
-# Shared libraries (.so) - all .so files referenced in "xxx-objs"
-host-cshlib := $(sort $(filter %.so, $(host-cobjs)))
-# Remove .so files from "xxx-objs"
-host-cobjs := $(filter-out %.so,$(host-cobjs))
-
-#Object (.o) files used by the shared libaries
-host-cshobjs := $(sort $(foreach m,$(host-cshlib),$($(m:.so=-objs))))
-
# output directory for programs/.o files
-# hostprogs-y := tools/build may have been specified. Retrieve directory
-host-objdirs := $(foreach f,$(__hostprogs), $(if $(dir $(f)),$(dir $(f))))
-# directory of .o files from prog-objs notation
-host-objdirs += $(foreach f,$(host-cmulti), \
- $(foreach m,$($(f)-objs), \
- $(if $(dir $(m)),$(dir $(m)))))
-# directory of .o files from prog-cxxobjs notation
-host-objdirs += $(foreach f,$(host-cxxmulti), \
- $(foreach m,$($(f)-cxxobjs), \
- $(if $(dir $(m)),$(dir $(m)))))
+# hostprogs-y := tools/build may have been specified.
+# Retrieve also directory of .o files from prog-objs or prog-cxxobjs notation
+host-objdirs := $(dir $(__hostprogs) $(host-cobjs) $(host-cxxobjs))
host-objdirs := $(strip $(sort $(filter-out ./,$(host-objdirs))))
@@ -81,8 +56,6 @@ host-cmulti := $(addprefix $(obj)/,$(host-cmulti))
host-cobjs := $(addprefix $(obj)/,$(host-cobjs))
host-cxxmulti := $(addprefix $(obj)/,$(host-cxxmulti))
host-cxxobjs := $(addprefix $(obj)/,$(host-cxxobjs))
-host-cshlib := $(addprefix $(obj)/,$(host-cshlib))
-host-cshobjs := $(addprefix $(obj)/,$(host-cshobjs))
host-objdirs := $(addprefix $(obj)/,$(host-objdirs))
obj-dirs += $(host-objdirs)
@@ -123,7 +96,7 @@ quiet_cmd_host-cmulti = HOSTLD $@
cmd_host-cmulti = $(HOSTCC) $(HOSTLDFLAGS) -o $@ \
$(addprefix $(obj)/,$($(@F)-objs)) \
$(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
-$(host-cmulti): $(obj)/%: $(host-cobjs) $(host-cshlib) FORCE
+$(host-cmulti): $(obj)/%: $(host-cobjs) FORCE
$(call if_changed,host-cmulti)
# Create .o file from a single .c file
@@ -140,7 +113,7 @@ quiet_cmd_host-cxxmulti = HOSTLD $@
$(foreach o,objs cxxobjs,\
$(addprefix $(obj)/,$($(@F)-$(o)))) \
$(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
-$(host-cxxmulti): $(obj)/%: $(host-cobjs) $(host-cxxobjs) $(host-cshlib) FORCE
+$(host-cxxmulti): $(obj)/%: $(host-cobjs) $(host-cxxobjs) FORCE
$(call if_changed,host-cxxmulti)
# Create .o file from a single .cc (C++) file
@@ -149,21 +122,5 @@ quiet_cmd_host-cxxobjs = HOSTCXX $@
$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE
$(call if_changed_dep,host-cxxobjs)
-# Compile .c file, create position independent .o file
-# host-cshobjs -> .o
-quiet_cmd_host-cshobjs = HOSTCC -fPIC $@
- cmd_host-cshobjs = $(HOSTCC) $(hostc_flags) -fPIC -c -o $@ $<
-$(host-cshobjs): $(obj)/%.o: $(src)/%.c FORCE
- $(call if_changed_dep,host-cshobjs)
-
-# Link a shared library, based on position independent .o files
-# *.o -> .so shared library (host-cshlib)
-quiet_cmd_host-cshlib = HOSTLLD -shared $@
- cmd_host-cshlib = $(HOSTCC) $(HOSTLDFLAGS) -shared -o $@ \
- $(addprefix $(obj)/,$($(@F:.so=-objs))) \
- $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
-$(host-cshlib): $(obj)/%: $(host-cshobjs) FORCE
- $(call if_changed,host-cshlib)
-
targets += $(host-csingle) $(host-cmulti) $(host-cobjs)\
- $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs)
+ $(host-cxxmulti) $(host-cxxobjs)
diff --git a/scripts/coccinelle/api/alloc/alloc_cast.cocci b/scripts/coccinelle/api/alloc/alloc_cast.cocci
new file mode 100644
index 00000000000..6c308ee19b3
--- /dev/null
+++ b/scripts/coccinelle/api/alloc/alloc_cast.cocci
@@ -0,0 +1,72 @@
+/// Remove casting the values returned by memory allocation functions
+/// like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc.
+///
+//# This makes an effort to find cases of casting of values returned by
+//# kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc,
+//# kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes
+//# the casting as it is not required. The result in the patch case may
+//#need some reformatting.
+//
+// Confidence: High
+// Copyright: 2014, Himangi Saraogi GPLv2.
+// Comments:
+// Options: --no-includes --include-headers
+//
+
+virtual context
+virtual patch
+virtual org
+virtual report
+
+//----------------------------------------------------------
+// For context mode
+//----------------------------------------------------------
+
+@depends on context@
+type T;
+@@
+
+* (T *)
+ \(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\|
+ kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...)
+
+//----------------------------------------------------------
+// For patch mode
+//----------------------------------------------------------
+
+@depends on patch@
+type T;
+@@
+
+- (T *)
+ (\(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\|
+ kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...))
+
+//----------------------------------------------------------
+// For org and report mode
+//----------------------------------------------------------
+
+@r depends on org || report@
+type T;
+position p;
+@@
+
+ (T@p *)\(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\|
+ kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...)
+
+@script:python depends on org@
+p << r.p;
+t << r.T;
+@@
+
+coccilib.org.print_safe_todo(p[0], t)
+
+@script:python depends on report@
+p << r.p;
+t << r.T;
+@@
+
+msg="WARNING: casting value returned by memory allocation function to (%s *) is useless." % (t)
+coccilib.report.print_report(p[0], msg)
+
+
diff --git a/scripts/coccinelle/misc/array_size.cocci b/scripts/coccinelle/misc/array_size.cocci
new file mode 100644
index 00000000000..81e279cd347
--- /dev/null
+++ b/scripts/coccinelle/misc/array_size.cocci
@@ -0,0 +1,87 @@
+/// Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element
+///
+//# This makes an effort to find cases where ARRAY_SIZE can be used such as
+//# where there is a division of sizeof the array by the sizeof its first
+//# element or by any indexed element or the element type. It replaces the
+//# division of the two sizeofs by ARRAY_SIZE.
+//
+// Confidence: High
+// Copyright: (C) 2014 Himangi Saraogi. GPLv2.
+// Comments:
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@i@
+@@
+
+#include <linux/kernel.h>
+
+//----------------------------------------------------------
+// For context mode
+//----------------------------------------------------------
+
+@depends on i&&context@
+type T;
+T[] E;
+@@
+(
+* (sizeof(E)/sizeof(*E))
+|
+* (sizeof(E)/sizeof(E[...]))
+|
+* (sizeof(E)/sizeof(T))
+)
+
+//----------------------------------------------------------
+// For patch mode
+//----------------------------------------------------------
+
+@depends on i&&patch@
+type T;
+T[] E;
+@@
+(
+- (sizeof(E)/sizeof(*E))
++ ARRAY_SIZE(E)
+|
+- (sizeof(E)/sizeof(E[...]))
++ ARRAY_SIZE(E)
+|
+- (sizeof(E)/sizeof(T))
++ ARRAY_SIZE(E)
+)
+
+//----------------------------------------------------------
+// For org and report mode
+//----------------------------------------------------------
+
+@r@
+type T;
+T[] E;
+position p;
+@@
+(
+ (sizeof(E)@p /sizeof(*E))
+|
+ (sizeof(E)@p /sizeof(E[...]))
+|
+ (sizeof(E)@p /sizeof(T))
+)
+
+@script:python depends on i&&org@
+p << r.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING should use ARRAY_SIZE")
+
+@script:python depends on i&&report@
+p << r.p;
+@@
+
+msg="WARNING: Use ARRAY_SIZE"
+coccilib.report.print_report(p[0], msg)
+
diff --git a/scripts/coccinelle/misc/badty.cocci b/scripts/coccinelle/misc/badty.cocci
new file mode 100644
index 00000000000..2fc06fc7192
--- /dev/null
+++ b/scripts/coccinelle/misc/badty.cocci
@@ -0,0 +1,76 @@
+/// Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element
+///
+//# This makes an effort to find cases where the argument to sizeof is wrong
+//# in memory allocation functions by checking the type of the allocated memory
+//# when it is a double pointer and ensuring the sizeof argument takes a pointer
+//# to the the memory being allocated. There are false positives in cases the
+//# sizeof argument is not used in constructing the return value. The result
+//# may need some reformatting.
+//
+// Confidence: Moderate
+// Copyright: (C) 2014 Himangi Saraogi. GPLv2.
+// Comments:
+// Options:
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+//----------------------------------------------------------
+// For context mode
+//----------------------------------------------------------
+
+@depends on context disable sizeof_type_expr@
+type T;
+T **x;
+@@
+
+ x =
+ <+...sizeof(
+* T
+ )...+>
+
+//----------------------------------------------------------
+// For patch mode
+//----------------------------------------------------------
+
+@depends on patch disable sizeof_type_expr@
+type T;
+T **x;
+@@
+
+ x =
+ <+...sizeof(
+- T
++ *x
+ )...+>
+
+//----------------------------------------------------------
+// For org and report mode
+//----------------------------------------------------------
+
+@r disable sizeof_type_expr@
+type T;
+T **x;
+position p;
+@@
+
+ x =
+ <+...sizeof(
+ T@p
+ )...+>
+
+@script:python depends on org@
+p << r.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING sizeof argument should be pointer type, not structure type")
+
+@script:python depends on report@
+p << r.p;
+@@
+
+msg="WARNING: Use correct pointer type argument for sizeof"
+coccilib.report.print_report(p[0], msg)
+
diff --git a/scripts/coccinelle/api/alloc/drop_kmalloc_cast.cocci b/scripts/coccinelle/misc/bugon.cocci
index bd5d08b882e..556456ca761 100644
--- a/scripts/coccinelle/api/alloc/drop_kmalloc_cast.cocci
+++ b/scripts/coccinelle/misc/bugon.cocci
@@ -1,20 +1,17 @@
+/// Use BUG_ON instead of a if condition followed by BUG.
///
-/// Casting (void *) value returned by kmalloc is useless
-/// as mentioned in Documentation/CodingStyle, Chap 14.
-///
-// Confidence: High
-// Copyright: 2009,2010 Nicolas Palix, DIKU. GPLv2.
-// URL: http://coccinelle.lip6.fr/
-// Options: --no-includes --include-headers
-//
-// Keywords: kmalloc, kzalloc, kcalloc
-// Version min: < 2.6.12 kmalloc
-// Version min: < 2.6.12 kcalloc
-// Version min: 2.6.14 kzalloc
+//# This makes an effort to find cases where BUG() follows an if
+//# condition on an expression and replaces the if condition and BUG()
+//# with a BUG_ON having the conditional expression of the if statement
+//# as argument.
//
+// Confidence: High
+// Copyright: (C) 2014 Himangi Saraogi. GPLv2.
+// Comments:
+// Options: --no-includes, --include-headers
-virtual context
virtual patch
+virtual context
virtual org
virtual report
@@ -23,45 +20,43 @@ virtual report
//----------------------------------------------------------
@depends on context@
-type T;
+expression e;
@@
-* (T *)
- \(kmalloc\|kzalloc\|kcalloc\)(...)
+*if (e) BUG();
//----------------------------------------------------------
// For patch mode
//----------------------------------------------------------
@depends on patch@
-type T;
+expression e;
@@
-- (T *)
- \(kmalloc\|kzalloc\|kcalloc\)(...)
+-if (e) BUG();
++BUG_ON(e);
//----------------------------------------------------------
// For org and report mode
//----------------------------------------------------------
-@r depends on org || report@
-type T;
+@r@
+expression e;
position p;
@@
- (T@p *)\(kmalloc\|kzalloc\|kcalloc\)(...)
+ if (e) BUG@p ();
@script:python depends on org@
p << r.p;
-t << r.T;
@@
-coccilib.org.print_safe_todo(p[0], t)
+coccilib.org.print_todo(p[0], "WARNING use BUG_ON")
@script:python depends on report@
p << r.p;
-t << r.T;
@@
-msg="WARNING: casting value returned by k[cmz]alloc to (%s *) is useless." % (t)
+msg="WARNING: Use BUG_ON"
coccilib.report.print_report(p[0], msg)
+
diff --git a/scripts/coccinelle/null/badzero.cocci b/scripts/coccinelle/null/badzero.cocci
index d79baf7220e..5551da2b4fe 100644
--- a/scripts/coccinelle/null/badzero.cocci
+++ b/scripts/coccinelle/null/badzero.cocci
@@ -10,7 +10,7 @@
// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. GPLv2.
// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. GPLv2.
// URL: http://coccinelle.lip6.fr/
-// Comments:
+// Comments: Requires Coccinelle version 1.0.0-rc20 or later
// Options:
virtual patch
@@ -19,6 +19,7 @@ virtual org
virtual report
@initialize:ocaml@
+@@
let negtable = Hashtbl.create 101
@depends on patch@
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 7a43c0c3831..8a431bcb056 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -992,9 +992,9 @@ static int snd_pmac_detect(struct snd_pmac *chip)
return -ENODEV;
if (!sound) {
- sound = of_find_node_by_name(NULL, "sound");
- while (sound && sound->parent != chip->node)
- sound = of_find_node_by_name(sound, "sound");
+ for_each_node_by_name(sound, "sound")
+ if (sound->parent == chip->node)
+ break;
}
if (! sound) {
of_node_put(chip->node);