summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/drbd/Makefile1
-rw-r--r--drivers/block/drbd/drbd_actlog.c518
-rw-r--r--drivers/block/drbd/drbd_bitmap.c150
-rw-r--r--drivers/block/drbd/drbd_debugfs.c958
-rw-r--r--drivers/block/drbd/drbd_debugfs.h39
-rw-r--r--drivers/block/drbd/drbd_int.h383
-rw-r--r--drivers/block/drbd/drbd_interval.h4
-rw-r--r--drivers/block/drbd/drbd_main.c302
-rw-r--r--drivers/block/drbd/drbd_nl.c110
-rw-r--r--drivers/block/drbd/drbd_proc.c125
-rw-r--r--drivers/block/drbd/drbd_receiver.c316
-rw-r--r--drivers/block/drbd/drbd_req.c527
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_state.c90
-rw-r--r--drivers/block/drbd/drbd_worker.c348
-rw-r--r--drivers/block/virtio_blk.c104
-rw-r--r--drivers/cpufreq/pmac64-cpufreq.c3
-rw-r--r--drivers/crypto/nx/nx-842.c30
-rw-r--r--drivers/edac/cell_edac.c3
-rw-r--r--drivers/hwmon/adm1025.c3
-rw-r--r--drivers/hwmon/adm1026.c3
-rw-r--r--drivers/hwmon/ads1015.c2
-rw-r--r--drivers/hwmon/asb100.c4
-rw-r--r--drivers/hwmon/dme1737.c33
-rw-r--r--drivers/hwmon/emc6w201.c4
-rw-r--r--drivers/hwmon/hih6130.c3
-rw-r--r--drivers/hwmon/lm87.c4
-rw-r--r--drivers/hwmon/lm92.c13
-rw-r--r--drivers/hwmon/pc87360.c3
-rw-r--r--drivers/hwmon/tmp103.c7
-rw-r--r--drivers/hwmon/vt1211.c3
-rw-r--r--drivers/hwmon/w83627hf.c3
-rw-r--r--drivers/hwmon/w83791d.c3
-rw-r--r--drivers/hwmon/w83793.c3
-rw-r--r--drivers/hwspinlock/Kconfig2
-rw-r--r--drivers/hwspinlock/omap_hwspinlock.c27
-rw-r--r--drivers/infiniband/core/agent.c16
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/iwcm.c27
-rw-r--r--drivers/infiniband/core/mad.c283
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/user_mad.c188
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c93
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c37
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c14
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c88
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c227
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c83
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h295
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c36
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c133
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c9
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c47
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c128
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c3
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bset.c2
-rw-r--r--drivers/md/bcache/bset.h2
-rw-r--r--drivers/md/bcache/btree.c50
-rw-r--r--drivers/md/bcache/btree.h5
-rw-r--r--drivers/md/bcache/extents.c13
-rw-r--r--drivers/md/bcache/extents.h1
-rw-r--r--drivers/md/bcache/journal.c24
-rw-r--r--drivers/md/bcache/request.c3
-rw-r--r--drivers/md/bcache/super.c57
-rw-r--r--drivers/md/bcache/util.h4
-rw-r--r--drivers/md/bcache/writeback.c14
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-cache-metadata.c4
-rw-r--r--drivers/md/dm-cache-metadata.h8
-rw-r--r--drivers/md/dm-cache-target.c128
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-io.c77
-rw-r--r--drivers/md/dm-mpath.c6
-rw-r--r--drivers/md/dm-switch.c67
-rw-r--r--drivers/md/dm-table.c86
-rw-r--r--drivers/md/dm-thin.c181
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/mfd/rtsx_usb.c1
-rw-r--r--drivers/mmc/card/block.c6
-rw-r--r--drivers/mmc/core/bus.c10
-rw-r--r--drivers/mmc/core/core.c3
-rw-r--r--drivers/mmc/core/mmc.c11
-rw-r--r--drivers/mmc/core/quirks.c2
-rw-r--r--drivers/mmc/core/sd_ops.c3
-rw-r--r--drivers/mmc/host/Kconfig28
-rw-r--r--drivers/mmc/host/Makefile1
-rw-r--r--drivers/mmc/host/dw_mmc.c98
-rw-r--r--drivers/mmc/host/dw_mmc.h5
-rw-r--r--drivers/mmc/host/mmci.c168
-rw-r--r--drivers/mmc/host/mmci.h20
-rw-r--r--drivers/mmc/host/moxart-mmc.c1
-rw-r--r--drivers/mmc/host/mxs-mmc.c3
-rw-r--r--drivers/mmc/host/omap_hsmmc.c283
-rw-r--r--drivers/mmc/host/s3cmci.c186
-rw-r--r--drivers/mmc/host/s3cmci.h4
-rw-r--r--drivers/mmc/host/sdhci-acpi.c4
-rw-r--r--drivers/mmc/host/sdhci-msm.c1
-rw-r--r--drivers/mmc/host/sdhci-pci.c38
-rw-r--r--drivers/mmc/host/sdhci-pci.h1
-rw-r--r--drivers/mmc/host/sdhci-pxav3.c13
-rw-r--r--drivers/mmc/host/sdhci-st.c176
-rw-r--r--drivers/mmc/host/sdhci-tegra.c2
-rw-r--r--drivers/mmc/host/sdhci.c144
-rw-r--r--drivers/mmc/host/sh_mmcif.c96
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c2
-rw-r--r--drivers/mmc/host/wmt-sdmmc.c33
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c7
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c3
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.c18
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.h3
-rw-r--r--drivers/net/ethernet/ibm/ehea/Makefile2
-rw-r--r--drivers/net/ethernet/intel/e1000e/manage.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_fcoe.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c26
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c88
-rw-r--r--drivers/net/ethernet/sun/sunvnet.c38
-rw-r--r--drivers/net/ethernet/sun/sunvnet.h4
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c1
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/net/macvlan.c12
-rw-r--r--drivers/net/wireless/ath/carl9170/carl9170.h1
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c31
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c4
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/pcie.c3
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.c1
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/mac80211.c3
-rw-r--r--drivers/net/xen-netback/common.h5
-rw-r--r--drivers/net/xen-netback/interface.c56
-rw-r--r--drivers/net/xen-netback/netback.c26
-rw-r--r--drivers/net/xen-netback/xenbus.c17
-rw-r--r--drivers/of/Kconfig3
-rw-r--r--drivers/of/Makefile4
-rw-r--r--drivers/of/base.c451
-rw-r--r--drivers/of/device.c4
-rw-r--r--drivers/of/dynamic.c660
-rw-r--r--drivers/of/fdt.c22
-rw-r--r--drivers/of/of_private.h59
-rw-r--r--drivers/of/of_reserved_mem.c70
-rw-r--r--drivers/of/platform.c32
-rw-r--r--drivers/of/selftest.c235
-rw-r--r--drivers/of/testcase-data/testcases.dts15
-rw-r--r--drivers/of/testcase-data/testcases.dtsi4
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c4
-rw-r--r--drivers/scsi/cxgbi/cxgb3i/Kconfig2
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/Kconfig2
-rw-r--r--drivers/scsi/scsi_transport_srp.c3
-rw-r--r--drivers/tty/ehv_bytechan.c43
-rw-r--r--drivers/tty/hvc/hvc_opal.c15
-rw-r--r--drivers/tty/hvc/hvc_vio.c29
-rw-r--r--drivers/tty/serial/pmac_zilog.c9
-rw-r--r--drivers/tty/serial/serial_core.c3
-rw-r--r--drivers/vfio/Kconfig6
-rw-r--r--drivers/vfio/Makefile2
-rw-r--r--drivers/vfio/pci/vfio_pci.c161
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h3
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c17
188 files changed, 7606 insertions, 3037 deletions
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 8b450338075..4464e353c1e 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -3,5 +3,6 @@ drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
drbd-y += drbd_interval.o drbd_state.o
drbd-y += drbd_nla.o
+drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 05a1780ffa8..d26a3fa6368 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -92,34 +92,26 @@ struct __packed al_transaction_on_disk {
__be32 context[AL_CONTEXT_PER_TRANSACTION];
};
-struct update_odbm_work {
- struct drbd_work w;
- struct drbd_device *device;
- unsigned int enr;
-};
-
-struct update_al_work {
- struct drbd_work w;
- struct drbd_device *device;
- struct completion event;
- int err;
-};
-
-
-void *drbd_md_get_buffer(struct drbd_device *device)
+void *drbd_md_get_buffer(struct drbd_device *device, const char *intent)
{
int r;
wait_event(device->misc_wait,
- (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 ||
+ (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 ||
device->state.disk <= D_FAILED);
- return r ? NULL : page_address(device->md_io_page);
+ if (r)
+ return NULL;
+
+ device->md_io.current_use = intent;
+ device->md_io.start_jif = jiffies;
+ device->md_io.submit_jif = device->md_io.start_jif - 1;
+ return page_address(device->md_io.page);
}
void drbd_md_put_buffer(struct drbd_device *device)
{
- if (atomic_dec_and_test(&device->md_io_in_use))
+ if (atomic_dec_and_test(&device->md_io.in_use))
wake_up(&device->misc_wait);
}
@@ -145,10 +137,11 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
static int _drbd_md_sync_page_io(struct drbd_device *device,
struct drbd_backing_dev *bdev,
- struct page *page, sector_t sector,
- int rw, int size)
+ sector_t sector, int rw)
{
struct bio *bio;
+ /* we do all our meta data IO in aligned 4k blocks. */
+ const int size = 4096;
int err;
device->md_io.done = 0;
@@ -156,15 +149,15 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
rw |= REQ_FUA | REQ_FLUSH;
- rw |= REQ_SYNC;
+ rw |= REQ_SYNC | REQ_NOIDLE;
bio = bio_alloc_drbd(GFP_NOIO);
bio->bi_bdev = bdev->md_bdev;
bio->bi_iter.bi_sector = sector;
err = -EIO;
- if (bio_add_page(bio, page, size, 0) != size)
+ if (bio_add_page(bio, device->md_io.page, size, 0) != size)
goto out;
- bio->bi_private = &device->md_io;
+ bio->bi_private = device;
bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw;
@@ -179,7 +172,8 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
}
bio_get(bio); /* one bio_put() is in the completion handler */
- atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
+ atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
+ device->md_io.submit_jif = jiffies;
if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
bio_endio(bio, -EIO);
else
@@ -197,9 +191,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
sector_t sector, int rw)
{
int err;
- struct page *iop = device->md_io_page;
-
- D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1);
+ D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
BUG_ON(!bdev->md_bdev);
@@ -214,8 +206,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
- /* we do all our meta data IO in aligned 4k blocks. */
- err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096);
+ err = _drbd_md_sync_page_io(device, bdev, sector, rw);
if (err) {
drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
@@ -297,26 +288,12 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
return need_transaction;
}
-static int al_write_transaction(struct drbd_device *device, bool delegate);
-
-/* When called through generic_make_request(), we must delegate
- * activity log I/O to the worker thread: a further request
- * submitted via generic_make_request() within the same task
- * would be queued on current->bio_list, and would only start
- * after this function returns (see generic_make_request()).
- *
- * However, if we *are* the worker, we must not delegate to ourselves.
- */
+static int al_write_transaction(struct drbd_device *device);
-/*
- * @delegate: delegate activity log I/O to the worker thread
- */
-void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
+void drbd_al_begin_io_commit(struct drbd_device *device)
{
bool locked = false;
- BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
-
/* Serialize multiple transactions.
* This uses test_and_set_bit, memory barrier is implicit.
*/
@@ -335,7 +312,7 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
rcu_read_unlock();
if (write_al_updates)
- al_write_transaction(device, delegate);
+ al_write_transaction(device);
spin_lock_irq(&device->al_lock);
/* FIXME
if (err)
@@ -352,12 +329,10 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
/*
* @delegate: delegate activity log I/O to the worker thread
*/
-void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate)
+void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
{
- BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
-
if (drbd_al_begin_io_prepare(device, i))
- drbd_al_begin_io_commit(device, delegate);
+ drbd_al_begin_io_commit(device);
}
int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
@@ -380,8 +355,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
/* We want all necessary updates for a given request within the same transaction
* We could first check how many updates are *actually* needed,
* and use that instead of the worst-case nr_al_extents */
- if (available_update_slots < nr_al_extents)
- return -EWOULDBLOCK;
+ if (available_update_slots < nr_al_extents) {
+ /* Too many activity log extents are currently "hot".
+ *
+ * If we have accumulated pending changes already,
+ * we made progress.
+ *
+ * If we cannot get even a single pending change through,
+ * stop the fast path until we made some progress,
+ * or requests to "cold" extents could be starved. */
+ if (!al->pending_changes)
+ __set_bit(__LC_STARVING, &device->act_log->flags);
+ return -ENOBUFS;
+ }
/* Is resync active in this area? */
for (enr = first; enr <= last; enr++) {
@@ -452,15 +438,6 @@ static unsigned int al_extent_to_bm_page(unsigned int al_enr)
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
}
-static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
-{
- return rs_enr >>
- /* bit to page */
- ((PAGE_SHIFT + 3) -
- /* resync extent number to bit */
- (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
-}
-
static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
{
const unsigned int stripes = device->ldev->md.al_stripes;
@@ -479,8 +456,7 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
}
-static int
-_al_write_transaction(struct drbd_device *device)
+int al_write_transaction(struct drbd_device *device)
{
struct al_transaction_on_disk *buffer;
struct lc_element *e;
@@ -505,7 +481,8 @@ _al_write_transaction(struct drbd_device *device)
return -EIO;
}
- buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */
+ /* protects md_io_buffer, al_tr_cycle, ... */
+ buffer = drbd_md_get_buffer(device, __func__);
if (!buffer) {
drbd_err(device, "disk failed while waiting for md_io buffer\n");
put_ldev(device);
@@ -590,38 +567,6 @@ _al_write_transaction(struct drbd_device *device)
return err;
}
-
-static int w_al_write_transaction(struct drbd_work *w, int unused)
-{
- struct update_al_work *aw = container_of(w, struct update_al_work, w);
- struct drbd_device *device = aw->device;
- int err;
-
- err = _al_write_transaction(device);
- aw->err = err;
- complete(&aw->event);
-
- return err != -EIO ? err : 0;
-}
-
-/* Calls from worker context (see w_restart_disk_io()) need to write the
- transaction directly. Others came through generic_make_request(),
- those need to delegate it to the worker. */
-static int al_write_transaction(struct drbd_device *device, bool delegate)
-{
- if (delegate) {
- struct update_al_work al_work;
- init_completion(&al_work.event);
- al_work.w.cb = w_al_write_transaction;
- al_work.device = device;
- drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
- &al_work.w);
- wait_for_completion(&al_work.event);
- return al_work.err;
- } else
- return _al_write_transaction(device);
-}
-
static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
{
int rv;
@@ -682,72 +627,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer)
return 0;
}
-static int w_update_odbm(struct drbd_work *w, int unused)
-{
- struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
- struct drbd_device *device = udw->device;
- struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
-
- if (!get_ldev(device)) {
- if (__ratelimit(&drbd_ratelimit_state))
- drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n");
- kfree(udw);
- return 0;
- }
-
- drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr));
- put_ldev(device);
-
- kfree(udw);
-
- if (drbd_bm_total_weight(device) <= device->rs_failed) {
- switch (device->state.conn) {
- case C_SYNC_SOURCE: case C_SYNC_TARGET:
- case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
- drbd_resync_finished(device);
- default:
- /* nothing to do */
- break;
- }
- }
- drbd_bcast_event(device, &sib);
-
- return 0;
-}
-
+static const char *drbd_change_sync_fname[] = {
+ [RECORD_RS_FAILED] = "drbd_rs_failed_io",
+ [SET_IN_SYNC] = "drbd_set_in_sync",
+ [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
+};
/* ATTENTION. The AL's extents are 4MB each, while the extents in the
* resync LRU-cache are 16MB each.
* The caller of this function has to hold an get_ldev() reference.
*
+ * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
+ * potentially pulling in (and recounting the corresponding bits)
+ * this resync extent into the resync extent lru cache.
+ *
+ * Returns whether all bits have been cleared for this resync extent,
+ * precisely: (rs_left <= rs_failed)
+ *
* TODO will be obsoleted once we have a caching lru of the on disk bitmap
*/
-static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector,
- int count, int success)
+static bool update_rs_extent(struct drbd_device *device,
+ unsigned int enr, int count,
+ enum update_sync_bits_mode mode)
{
struct lc_element *e;
- struct update_odbm_work *udw;
-
- unsigned int enr;
D_ASSERT(device, atomic_read(&device->local_cnt));
- /* I simply assume that a sector/size pair never crosses
- * a 16 MB extent border. (Currently this is true...) */
- enr = BM_SECT_TO_EXT(sector);
-
- e = lc_get(device->resync, enr);
+ /* When setting out-of-sync bits,
+ * we don't need it cached (lc_find).
+ * But if it is present in the cache,
+ * we should update the cached bit count.
+ * Otherwise, that extent should be in the resync extent lru cache
+ * already -- or we want to pull it in if necessary -- (lc_get),
+ * then update and check rs_left and rs_failed. */
+ if (mode == SET_OUT_OF_SYNC)
+ e = lc_find(device->resync, enr);
+ else
+ e = lc_get(device->resync, enr);
if (e) {
struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
if (ext->lce.lc_number == enr) {
- if (success)
+ if (mode == SET_IN_SYNC)
ext->rs_left -= count;
+ else if (mode == SET_OUT_OF_SYNC)
+ ext->rs_left += count;
else
ext->rs_failed += count;
if (ext->rs_left < ext->rs_failed) {
- drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d "
+ drbd_warn(device, "BAD! enr=%u rs_left=%d "
"rs_failed=%d count=%d cstate=%s\n",
- (unsigned long long)sector,
ext->lce.lc_number, ext->rs_left,
ext->rs_failed, count,
drbd_conn_str(device->state.conn));
@@ -781,34 +710,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto
ext->lce.lc_number, ext->rs_failed);
}
ext->rs_left = rs_left;
- ext->rs_failed = success ? 0 : count;
+ ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
/* we don't keep a persistent log of the resync lru,
* we can commit any change right away. */
lc_committed(device->resync);
}
- lc_put(device->resync, &ext->lce);
+ if (mode != SET_OUT_OF_SYNC)
+ lc_put(device->resync, &ext->lce);
/* no race, we are within the al_lock! */
- if (ext->rs_left == ext->rs_failed) {
+ if (ext->rs_left <= ext->rs_failed) {
ext->rs_failed = 0;
-
- udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
- if (udw) {
- udw->enr = ext->lce.lc_number;
- udw->w.cb = w_update_odbm;
- udw->device = device;
- drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
- &udw->w);
- } else {
- drbd_warn(device, "Could not kmalloc an udw\n");
- }
+ return true;
}
- } else {
+ } else if (mode != SET_OUT_OF_SYNC) {
+ /* be quiet if lc_find() did not find it. */
drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
device->resync_locked,
device->resync->nr_elements,
device->resync->flags);
}
+ return false;
}
void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
@@ -827,105 +749,105 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go
}
}
-/* clear the bit corresponding to the piece of storage in question:
- * size byte of data starting from sector. Only clear a bits of the affected
- * one ore more _aligned_ BM_BLOCK_SIZE blocks.
- *
- * called by worker on C_SYNC_TARGET and receiver on SyncSource.
- *
- */
-void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
- const char *file, const unsigned int line)
+/* It is called lazy update, so don't do write-out too often. */
+static bool lazy_bitmap_update_due(struct drbd_device *device)
{
- /* Is called from worker and receiver context _only_ */
- unsigned long sbnr, ebnr, lbnr;
- unsigned long count = 0;
- sector_t esector, nr_sectors;
- int wake_up = 0;
- unsigned long flags;
+ return time_after(jiffies, device->rs_last_bcast + 2*HZ);
+}
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
- (unsigned long long)sector, size);
+static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
+{
+ if (rs_done)
+ set_bit(RS_DONE, &device->flags);
+ /* and also set RS_PROGRESS below */
+ else if (!lazy_bitmap_update_due(device))
return;
- }
-
- if (!get_ldev(device))
- return; /* no disk, no metadata, no bitmap to clear bits in */
-
- nr_sectors = drbd_get_capacity(device->this_bdev);
- esector = sector + (size >> 9) - 1;
-
- if (!expect(sector < nr_sectors))
- goto out;
- if (!expect(esector < nr_sectors))
- esector = nr_sectors - 1;
-
- lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
- /* we clear it (in sync).
- * round up start sector, round down end sector. we make sure we only
- * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
- if (unlikely(esector < BM_SECT_PER_BIT-1))
- goto out;
- if (unlikely(esector == (nr_sectors-1)))
- ebnr = lbnr;
- else
- ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
- sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
- if (sbnr > ebnr)
- goto out;
+ drbd_device_post_work(device, RS_PROGRESS);
+}
+static int update_sync_bits(struct drbd_device *device,
+ unsigned long sbnr, unsigned long ebnr,
+ enum update_sync_bits_mode mode)
+{
/*
- * ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors.
+ * We keep a count of set bits per resync-extent in the ->rs_left
+ * caching member, so we need to loop and work within the resync extent
+ * alignment. Typically this loop will execute exactly once.
*/
- count = drbd_bm_clear_bits(device, sbnr, ebnr);
- if (count) {
- drbd_advance_rs_marks(device, drbd_bm_total_weight(device));
- spin_lock_irqsave(&device->al_lock, flags);
- drbd_try_clear_on_disk_bm(device, sector, count, true);
- spin_unlock_irqrestore(&device->al_lock, flags);
-
- /* just wake_up unconditional now, various lc_chaged(),
- * lc_put() in drbd_try_clear_on_disk_bm(). */
- wake_up = 1;
+ unsigned long flags;
+ unsigned long count = 0;
+ unsigned int cleared = 0;
+ while (sbnr <= ebnr) {
+ /* set temporary boundary bit number to last bit number within
+ * the resync extent of the current start bit number,
+ * but cap at provided end bit number */
+ unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
+ unsigned long c;
+
+ if (mode == RECORD_RS_FAILED)
+ /* Only called from drbd_rs_failed_io(), bits
+ * supposedly still set. Recount, maybe some
+ * of the bits have been successfully cleared
+ * by application IO meanwhile.
+ */
+ c = drbd_bm_count_bits(device, sbnr, tbnr);
+ else if (mode == SET_IN_SYNC)
+ c = drbd_bm_clear_bits(device, sbnr, tbnr);
+ else /* if (mode == SET_OUT_OF_SYNC) */
+ c = drbd_bm_set_bits(device, sbnr, tbnr);
+
+ if (c) {
+ spin_lock_irqsave(&device->al_lock, flags);
+ cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
+ spin_unlock_irqrestore(&device->al_lock, flags);
+ count += c;
+ }
+ sbnr = tbnr + 1;
}
-out:
- put_ldev(device);
- if (wake_up)
+ if (count) {
+ if (mode == SET_IN_SYNC) {
+ unsigned long still_to_go = drbd_bm_total_weight(device);
+ bool rs_is_done = (still_to_go <= device->rs_failed);
+ drbd_advance_rs_marks(device, still_to_go);
+ if (cleared || rs_is_done)
+ maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
+ } else if (mode == RECORD_RS_FAILED)
+ device->rs_failed += count;
wake_up(&device->al_wait);
+ }
+ return count;
}
-/*
- * this is intended to set one request worth of data out of sync.
- * affects at least 1 bit,
- * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector. Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
*
- * called by tl_clear and drbd_send_dblock (==drbd_make_request).
- * so this can be _any_ process.
*/
-int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size,
- const char *file, const unsigned int line)
+int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+ enum update_sync_bits_mode mode,
+ const char *file, const unsigned int line)
{
- unsigned long sbnr, ebnr, flags;
+ /* Is called from worker and receiver context _only_ */
+ unsigned long sbnr, ebnr, lbnr;
+ unsigned long count = 0;
sector_t esector, nr_sectors;
- unsigned int enr, count = 0;
- struct lc_element *e;
- /* this should be an empty REQ_FLUSH */
- if (size == 0)
+ /* This would be an empty REQ_FLUSH, be silent. */
+ if ((mode == SET_OUT_OF_SYNC) && size == 0)
return 0;
- if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "sector: %llus, size: %d\n",
- (unsigned long long)sector, size);
+ if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
+ drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
+ drbd_change_sync_fname[mode],
+ (unsigned long long)sector, size);
return 0;
}
if (!get_ldev(device))
- return 0; /* no disk, no metadata, no bitmap to set bits in */
+ return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
nr_sectors = drbd_get_capacity(device->this_bdev);
esector = sector + (size >> 9) - 1;
@@ -935,25 +857,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
if (!expect(esector < nr_sectors))
esector = nr_sectors - 1;
- /* we set it out of sync,
- * we do not need to round anything here */
- sbnr = BM_SECT_TO_BIT(sector);
- ebnr = BM_SECT_TO_BIT(esector);
-
- /* ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors. */
- spin_lock_irqsave(&device->al_lock, flags);
- count = drbd_bm_set_bits(device, sbnr, ebnr);
+ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
- enr = BM_SECT_TO_EXT(sector);
- e = lc_find(device->resync, enr);
- if (e)
- lc_entry(e, struct bm_extent, lce)->rs_left += count;
- spin_unlock_irqrestore(&device->al_lock, flags);
+ if (mode == SET_IN_SYNC) {
+ /* Round up start sector, round down end sector. We make sure
+ * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
+ if (unlikely(esector < BM_SECT_PER_BIT-1))
+ goto out;
+ if (unlikely(esector == (nr_sectors-1)))
+ ebnr = lbnr;
+ else
+ ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+ sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+ } else {
+ /* We set it out of sync, or record resync failure.
+ * Should not round anything here. */
+ sbnr = BM_SECT_TO_BIT(sector);
+ ebnr = BM_SECT_TO_BIT(esector);
+ }
+ count = update_sync_bits(device, sbnr, ebnr, mode);
out:
put_ldev(device);
-
return count;
}
@@ -1075,6 +1000,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
struct lc_element *e;
struct bm_extent *bm_ext;
int i;
+ bool throttle = drbd_rs_should_slow_down(device, sector, true);
+
+ /* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
+ * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
+ * need to throttle. There is at most one such half-locked extent,
+ * which is remembered in resync_wenr. */
+
+ if (throttle && device->resync_wenr != enr)
+ return -EAGAIN;
spin_lock_irq(&device->al_lock);
if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
@@ -1098,8 +1032,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
clear_bit(BME_NO_WRITES, &bm_ext->flags);
device->resync_wenr = LC_FREE;
- if (lc_put(device->resync, &bm_ext->lce) == 0)
+ if (lc_put(device->resync, &bm_ext->lce) == 0) {
+ bm_ext->flags = 0;
device->resync_locked--;
+ }
wake_up(&device->al_wait);
} else {
drbd_alert(device, "LOGIC BUG\n");
@@ -1161,8 +1097,20 @@ proceed:
return 0;
try_again:
- if (bm_ext)
- device->resync_wenr = enr;
+ if (bm_ext) {
+ if (throttle) {
+ D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
+ D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
+ clear_bit(BME_NO_WRITES, &bm_ext->flags);
+ device->resync_wenr = LC_FREE;
+ if (lc_put(device->resync, &bm_ext->lce) == 0) {
+ bm_ext->flags = 0;
+ device->resync_locked--;
+ }
+ wake_up(&device->al_wait);
+ } else
+ device->resync_wenr = enr;
+ }
spin_unlock_irq(&device->al_lock);
return -EAGAIN;
}
@@ -1270,69 +1218,3 @@ int drbd_rs_del_all(struct drbd_device *device)
return 0;
}
-
-/**
- * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
- * @device: DRBD device.
- * @sector: The sector number.
- * @size: Size of failed IO operation, in byte.
- */
-void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
-{
- /* Is called from worker and receiver context _only_ */
- unsigned long sbnr, ebnr, lbnr;
- unsigned long count;
- sector_t esector, nr_sectors;
- int wake_up = 0;
-
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
- (unsigned long long)sector, size);
- return;
- }
- nr_sectors = drbd_get_capacity(device->this_bdev);
- esector = sector + (size >> 9) - 1;
-
- if (!expect(sector < nr_sectors))
- return;
- if (!expect(esector < nr_sectors))
- esector = nr_sectors - 1;
-
- lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
- /*
- * round up start sector, round down end sector. we make sure we only
- * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
- if (unlikely(esector < BM_SECT_PER_BIT-1))
- return;
- if (unlikely(esector == (nr_sectors-1)))
- ebnr = lbnr;
- else
- ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
- sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
-
- if (sbnr > ebnr)
- return;
-
- /*
- * ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors.
- */
- spin_lock_irq(&device->al_lock);
- count = drbd_bm_count_bits(device, sbnr, ebnr);
- if (count) {
- device->rs_failed += count;
-
- if (get_ldev(device)) {
- drbd_try_clear_on_disk_bm(device, sector, count, false);
- put_ldev(device);
- }
-
- /* just wake_up unconditional now, various lc_chaged(),
- * lc_put() in drbd_try_clear_on_disk_bm(). */
- wake_up = 1;
- }
- spin_unlock_irq(&device->al_lock);
- if (wake_up)
- wake_up(&device->al_wait);
-}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 1aa29f8fdfe..426c97aef90 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -22,6 +22,8 @@
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/bitops.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
@@ -353,9 +355,8 @@ static void bm_free_pages(struct page **pages, unsigned long number)
for (i = 0; i < number; i++) {
if (!pages[i]) {
- printk(KERN_ALERT "drbd: bm_free_pages tried to free "
- "a NULL pointer; i=%lu n=%lu\n",
- i, number);
+ pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n",
+ i, number);
continue;
}
__free_page(pages[i]);
@@ -592,7 +593,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
end = offset + len;
if (end > b->bm_words) {
- printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
+ pr_alert("bm_memset end > bm_words\n");
return;
}
@@ -602,7 +603,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
p_addr = bm_map_pidx(b, idx);
bm = p_addr + MLPP(offset);
if (bm+do_now > p_addr + LWPP) {
- printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
+ pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
p_addr, bm, (int)do_now);
} else
memset(bm, c, do_now * sizeof(long));
@@ -927,22 +928,14 @@ void drbd_bm_clear_all(struct drbd_device *device)
spin_unlock_irq(&b->bm_lock);
}
-struct bm_aio_ctx {
- struct drbd_device *device;
- atomic_t in_flight;
- unsigned int done;
- unsigned flags;
-#define BM_AIO_COPY_PAGES 1
-#define BM_AIO_WRITE_HINTED 2
-#define BM_WRITE_ALL_PAGES 4
- int error;
- struct kref kref;
-};
-
-static void bm_aio_ctx_destroy(struct kref *kref)
+static void drbd_bm_aio_ctx_destroy(struct kref *kref)
{
- struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+ struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref);
+ unsigned long flags;
+ spin_lock_irqsave(&ctx->device->resource->req_lock, flags);
+ list_del(&ctx->list);
+ spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags);
put_ldev(ctx->device);
kfree(ctx);
}
@@ -950,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref)
/* bv_page may be a copy, or may be the original */
static void bm_async_io_complete(struct bio *bio, int error)
{
- struct bm_aio_ctx *ctx = bio->bi_private;
+ struct drbd_bm_aio_ctx *ctx = bio->bi_private;
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
@@ -993,17 +986,18 @@ static void bm_async_io_complete(struct bio *bio, int error)
if (atomic_dec_and_test(&ctx->in_flight)) {
ctx->done = 1;
wake_up(&device->misc_wait);
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
}
}
-static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
+static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
struct bio *bio = bio_alloc_drbd(GFP_NOIO);
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
struct page *page;
unsigned int len;
+ unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE;
sector_t on_disk_sector =
device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1049,9 +1043,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
/*
* bm_rw: read/write the whole bitmap from/to its on disk location.
*/
-static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
+static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
{
- struct bm_aio_ctx *ctx;
+ struct drbd_bm_aio_ctx *ctx;
struct drbd_bitmap *b = device->bitmap;
int num_pages, i, count = 0;
unsigned long now;
@@ -1067,12 +1061,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
* as we submit copies of pages anyways.
*/
- ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+ ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO);
if (!ctx)
return -ENOMEM;
- *ctx = (struct bm_aio_ctx) {
+ *ctx = (struct drbd_bm_aio_ctx) {
.device = device,
+ .start_jif = jiffies,
.in_flight = ATOMIC_INIT(1),
.done = 0,
.flags = flags,
@@ -1080,15 +1075,21 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
.kref = { ATOMIC_INIT(2) },
};
- if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
+ if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */
drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
kfree(ctx);
return -ENODEV;
}
+ /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
+ drbd_adm_attach(), after device->ldev was assigned. */
- if (!ctx->flags)
+ if (0 == (ctx->flags & ~BM_AIO_READ))
WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
+ spin_lock_irq(&device->resource->req_lock);
+ list_add_tail(&ctx->list, &device->pending_bitmap_io);
+ spin_unlock_irq(&device->resource->req_lock);
+
num_pages = b->bm_number_of_pages;
now = jiffies;
@@ -1098,13 +1099,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
/* ignore completely unchanged pages */
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
- if (rw & WRITE) {
+ if (!(flags & BM_AIO_READ)) {
if ((flags & BM_AIO_WRITE_HINTED) &&
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
&page_private(b->bm_pages[i])))
continue;
- if (!(flags & BM_WRITE_ALL_PAGES) &&
+ if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
continue;
@@ -1118,7 +1119,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
}
}
atomic_inc(&ctx->in_flight);
- bm_page_io_async(ctx, i, rw);
+ bm_page_io_async(ctx, i);
++count;
cond_resched();
}
@@ -1134,12 +1135,12 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
if (!atomic_dec_and_test(&ctx->in_flight))
wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
else
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
/* summary for global bitmap IO */
if (flags == 0)
drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
- rw == WRITE ? "WRITE" : "READ",
+ (flags & BM_AIO_READ) ? "READ" : "WRITE",
count, jiffies - now);
if (ctx->error) {
@@ -1152,20 +1153,18 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
err = -EIO; /* Disk timeout/force-detach during IO... */
now = jiffies;
- if (rw == WRITE) {
- drbd_md_flush(device);
- } else /* rw == READ */ {
+ if (flags & BM_AIO_READ) {
b->bm_set = bm_count_bits(b);
drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
jiffies - now);
}
now = b->bm_set;
- if (flags == 0)
+ if ((flags & ~BM_AIO_READ) == 0)
drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
return err;
}
@@ -1175,7 +1174,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
*/
int drbd_bm_read(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, READ, 0, 0);
+ return bm_rw(device, BM_AIO_READ, 0);
}
/**
@@ -1186,7 +1185,7 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, 0, 0);
+ return bm_rw(device, 0, 0);
}
/**
@@ -1197,7 +1196,17 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0);
+ return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
+}
+
+/**
+ * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
+ * @device: DRBD device.
+ * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
+ */
+int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
+{
+ return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
}
/**
@@ -1213,7 +1222,7 @@ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0);
+ return bm_rw(device, BM_AIO_COPY_PAGES, 0);
}
/**
@@ -1222,62 +1231,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
*/
int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
-}
-
-/**
- * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
- * @device: DRBD device.
- * @idx: bitmap page index
- *
- * We don't want to special case on logical_block_size of the backend device,
- * so we submit PAGE_SIZE aligned pieces.
- * Note that on "most" systems, PAGE_SIZE is 4k.
- *
- * In case this becomes an issue on systems with larger PAGE_SIZE,
- * we may want to change this again to write 4k aligned 4k pieces.
- */
-int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local)
-{
- struct bm_aio_ctx *ctx;
- int err;
-
- if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) {
- dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx);
- return 0;
- }
-
- ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
- if (!ctx)
- return -ENOMEM;
-
- *ctx = (struct bm_aio_ctx) {
- .device = device,
- .in_flight = ATOMIC_INIT(1),
- .done = 0,
- .flags = BM_AIO_COPY_PAGES,
- .error = 0,
- .kref = { ATOMIC_INIT(2) },
- };
-
- if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
- drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
- kfree(ctx);
- return -ENODEV;
- }
-
- bm_page_io_async(ctx, idx, WRITE_SYNC);
- wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
-
- if (ctx->error)
- drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
- /* that causes us to detach, so the in memory bitmap will be
- * gone in a moment as well. */
-
- device->bm_writ_cnt++;
- err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
- kref_put(&ctx->kref, &bm_aio_ctx_destroy);
- return err;
+ return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
}
/* NOTE
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
new file mode 100644
index 00000000000..5c20b18540b
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -0,0 +1,958 @@
+#define pr_fmt(fmt) "drbd debugfs: " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+
+#include "drbd_int.h"
+#include "drbd_req.h"
+#include "drbd_debugfs.h"
+
+
+/**********************************************************************
+ * Whenever you change the file format, remember to bump the version. *
+ **********************************************************************/
+
+static struct dentry *drbd_debugfs_root;
+static struct dentry *drbd_debugfs_version;
+static struct dentry *drbd_debugfs_resources;
+static struct dentry *drbd_debugfs_minors;
+
+static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt)
+{
+ if (valid)
+ seq_printf(m, "\t%d", jiffies_to_msecs(dt));
+ else
+ seq_printf(m, "\t-");
+}
+
+static void __seq_print_rq_state_bit(struct seq_file *m,
+ bool is_set, char *sep, const char *set_name, const char *unset_name)
+{
+ if (is_set && set_name) {
+ seq_putc(m, *sep);
+ seq_puts(m, set_name);
+ *sep = '|';
+ } else if (!is_set && unset_name) {
+ seq_putc(m, *sep);
+ seq_puts(m, unset_name);
+ *sep = '|';
+ }
+}
+
+static void seq_print_rq_state_bit(struct seq_file *m,
+ bool is_set, char *sep, const char *set_name)
+{
+ __seq_print_rq_state_bit(m, is_set, sep, set_name, NULL);
+}
+
+/* pretty print enum drbd_req_state_bits req->rq_state */
+static void seq_print_request_state(struct seq_file *m, struct drbd_request *req)
+{
+ unsigned int s = req->rq_state;
+ char sep = ' ';
+ seq_printf(m, "\t0x%08x", s);
+ seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed");
+
+ /* RQ_WRITE ignored, already reported */
+ seq_puts(m, "\tlocal:");
+ seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL");
+ seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed");
+ seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended");
+ sep = ' ';
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending");
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed");
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted");
+ seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok");
+ if (sep == ' ')
+ seq_puts(m, " -");
+
+ /* for_each_connection ... */
+ seq_printf(m, "\tnet:");
+ sep = ' ';
+ seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending");
+ seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued");
+ seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent");
+ seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done");
+ seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis");
+ seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok");
+ if (sep == ' ')
+ seq_puts(m, " -");
+
+ seq_printf(m, " :");
+ sep = ' ';
+ seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B");
+ seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C");
+ seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr");
+ if (sep == ' ')
+ seq_puts(m, " -");
+ seq_printf(m, "\n");
+}
+
+static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now)
+{
+ /* change anything here, fixup header below! */
+ unsigned int s = req->rq_state;
+
+#define RQ_HDR_1 "epoch\tsector\tsize\trw"
+ seq_printf(m, "0x%x\t%llu\t%u\t%s",
+ req->epoch,
+ (unsigned long long)req->i.sector, req->i.size >> 9,
+ (s & RQ_WRITE) ? "W" : "R");
+
+#define RQ_HDR_2 "\tstart\tin AL\tsubmit"
+ seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif));
+ seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif);
+ seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif);
+
+#define RQ_HDR_3 "\tsent\tacked\tdone"
+ seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif);
+ seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif);
+ seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif);
+
+#define RQ_HDR_4 "\tstate\n"
+ seq_print_request_state(m, req);
+}
+#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4
+
+static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now)
+{
+ seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr);
+ seq_print_one_request(m, req, now);
+}
+
+static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n");
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ struct drbd_md_io tmp;
+ /* In theory this is racy,
+ * in the sense that there could have been a
+ * drbd_md_put_buffer(); drbd_md_get_buffer();
+ * between accessing these members here. */
+ tmp = device->md_io;
+ if (atomic_read(&tmp.in_use)) {
+ seq_printf(m, "%u\t%u\t%d\t",
+ device->minor, device->vnr,
+ jiffies_to_msecs(now - tmp.start_jif));
+ if (time_before(tmp.submit_jif, tmp.start_jif))
+ seq_puts(m, "-\t");
+ else
+ seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif));
+ seq_printf(m, "%s\n", tmp.current_use);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ seq_puts(m, "minor\tvnr\tage\t#waiting\n");
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ unsigned long jif;
+ struct drbd_request *req;
+ int n = atomic_read(&device->ap_actlog_cnt);
+ if (n) {
+ spin_lock_irq(&device->resource->req_lock);
+ req = list_first_entry_or_null(&device->pending_master_completion[1],
+ struct drbd_request, req_pending_master_completion);
+ /* if the oldest request does not wait for the activity log
+ * it is not interesting for us here */
+ if (req && !(req->rq_state & RQ_IN_ACT_LOG))
+ jif = req->start_jif;
+ else
+ req = NULL;
+ spin_unlock_irq(&device->resource->req_lock);
+ }
+ if (n) {
+ seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
+ if (req)
+ seq_printf(m, "%u\t", jiffies_to_msecs(now - jif));
+ else
+ seq_puts(m, "-\t");
+ seq_printf(m, "%u\n", n);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now)
+{
+ struct drbd_bm_aio_ctx *ctx;
+ unsigned long start_jif;
+ unsigned int in_flight;
+ unsigned int flags;
+ spin_lock_irq(&device->resource->req_lock);
+ ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list);
+ if (ctx && ctx->done)
+ ctx = NULL;
+ if (ctx) {
+ start_jif = ctx->start_jif;
+ in_flight = atomic_read(&ctx->in_flight);
+ flags = ctx->flags;
+ }
+ spin_unlock_irq(&device->resource->req_lock);
+ if (ctx) {
+ seq_printf(m, "%u\t%u\t%c\t%u\t%u\n",
+ device->minor, device->vnr,
+ (flags & BM_AIO_READ) ? 'R' : 'W',
+ jiffies_to_msecs(now - start_jif),
+ in_flight);
+ }
+}
+
+static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n");
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ seq_print_device_bitmap_io(m, device, now);
+ }
+ rcu_read_unlock();
+}
+
+/* pretty print enum peer_req->flags */
+static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req)
+{
+ unsigned long f = peer_req->flags;
+ char sep = ' ';
+
+ __seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing");
+ __seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal");
+ seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
+ seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
+ seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
+
+ if (f & EE_IS_TRIM) {
+ seq_putc(m, sep);
+ sep = '|';
+ if (f & EE_IS_TRIM_USE_ZEROOUT)
+ seq_puts(m, "zero-out");
+ else
+ seq_puts(m, "trim");
+ }
+ seq_putc(m, '\n');
+}
+
+static void seq_print_peer_request(struct seq_file *m,
+ struct drbd_device *device, struct list_head *lh,
+ unsigned long now)
+{
+ bool reported_preparing = false;
+ struct drbd_peer_request *peer_req;
+ list_for_each_entry(peer_req, lh, w.list) {
+ if (reported_preparing && !(peer_req->flags & EE_SUBMITTED))
+ continue;
+
+ if (device)
+ seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
+
+ seq_printf(m, "%llu\t%u\t%c\t%u\t",
+ (unsigned long long)peer_req->i.sector, peer_req->i.size >> 9,
+ (peer_req->flags & EE_WRITE) ? 'W' : 'R',
+ jiffies_to_msecs(now - peer_req->submit_jif));
+ seq_print_peer_request_flags(m, peer_req);
+ if (peer_req->flags & EE_SUBMITTED)
+ break;
+ else
+ reported_preparing = true;
+ }
+}
+
+static void seq_print_device_peer_requests(struct seq_file *m,
+ struct drbd_device *device, unsigned long now)
+{
+ seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n");
+ spin_lock_irq(&device->resource->req_lock);
+ seq_print_peer_request(m, device, &device->active_ee, now);
+ seq_print_peer_request(m, device, &device->read_ee, now);
+ seq_print_peer_request(m, device, &device->sync_ee, now);
+ spin_unlock_irq(&device->resource->req_lock);
+ if (test_bit(FLUSH_PENDING, &device->flags)) {
+ seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n",
+ device->minor, device->vnr,
+ jiffies_to_msecs(now - device->flush_jif));
+ }
+}
+
+static void seq_print_resource_pending_peer_requests(struct seq_file *m,
+ struct drbd_resource *resource, unsigned long now)
+{
+ struct drbd_device *device;
+ unsigned int i;
+
+ rcu_read_lock();
+ idr_for_each_entry(&resource->devices, device, i) {
+ seq_print_device_peer_requests(m, device, now);
+ }
+ rcu_read_unlock();
+}
+
+static void seq_print_resource_transfer_log_summary(struct seq_file *m,
+ struct drbd_resource *resource,
+ struct drbd_connection *connection,
+ unsigned long now)
+{
+ struct drbd_request *req;
+ unsigned int count = 0;
+ unsigned int show_state = 0;
+
+ seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR);
+ spin_lock_irq(&resource->req_lock);
+ list_for_each_entry(req, &connection->transfer_log, tl_requests) {
+ unsigned int tmp = 0;
+ unsigned int s;
+ ++count;
+
+ /* don't disable irq "forever" */
+ if (!(count & 0x1ff)) {
+ struct drbd_request *req_next;
+ kref_get(&req->kref);
+ spin_unlock_irq(&resource->req_lock);
+ cond_resched();
+ spin_lock_irq(&resource->req_lock);
+ req_next = list_next_entry(req, tl_requests);
+ if (kref_put(&req->kref, drbd_req_destroy))
+ req = req_next;
+ if (&req->tl_requests == &connection->transfer_log)
+ break;
+ }
+
+ s = req->rq_state;
+
+ /* This is meant to summarize timing issues, to be able to tell
+ * local disk problems from network problems.
+ * Skip requests, if we have shown an even older request with
+ * similar aspects already. */
+ if (req->master_bio == NULL)
+ tmp |= 1;
+ if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING))
+ tmp |= 2;
+ if (s & RQ_NET_MASK) {
+ if (!(s & RQ_NET_SENT))
+ tmp |= 4;
+ if (s & RQ_NET_PENDING)
+ tmp |= 8;
+ if (!(s & RQ_NET_DONE))
+ tmp |= 16;
+ }
+ if ((tmp & show_state) == tmp)
+ continue;
+ show_state |= tmp;
+ seq_printf(m, "%u\t", count);
+ seq_print_minor_vnr_req(m, req, now);
+ if (show_state == 0x1f)
+ break;
+ }
+ spin_unlock_irq(&resource->req_lock);
+}
+
+/* TODO: transfer_log and friends should be moved to resource */
+static int in_flight_summary_show(struct seq_file *m, void *pos)
+{
+ struct drbd_resource *resource = m->private;
+ struct drbd_connection *connection;
+ unsigned long jif = jiffies;
+
+ connection = first_connection(resource);
+ /* This does not happen, actually.
+ * But be robust and prepare for future code changes. */
+ if (!connection || !kref_get_unless_zero(&connection->kref))
+ return -ESTALE;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ seq_puts(m, "oldest bitmap IO\n");
+ seq_print_resource_pending_bitmap_io(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "meta data IO\n");
+ seq_print_resource_pending_meta_io(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "socket buffer stats\n");
+ /* for each connection ... once we have more than one */
+ rcu_read_lock();
+ if (connection->data.socket) {
+ /* open coded SIOCINQ, the "relevant" part */
+ struct tcp_sock *tp = tcp_sk(connection->data.socket->sk);
+ int answ = tp->rcv_nxt - tp->copied_seq;
+ seq_printf(m, "unread receive buffer: %u Byte\n", answ);
+ /* open coded SIOCOUTQ, the "relevant" part */
+ answ = tp->write_seq - tp->snd_una;
+ seq_printf(m, "unacked send buffer: %u Byte\n", answ);
+ }
+ rcu_read_unlock();
+ seq_putc(m, '\n');
+
+ seq_puts(m, "oldest peer requests\n");
+ seq_print_resource_pending_peer_requests(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "application requests waiting for activity log\n");
+ seq_print_waiting_for_AL(m, resource, jif);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "oldest application requests\n");
+ seq_print_resource_transfer_log_summary(m, resource, connection, jif);
+ seq_putc(m, '\n');
+
+ jif = jiffies - jif;
+ if (jif)
+ seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif));
+ kref_put(&connection->kref, drbd_destroy_connection);
+ return 0;
+}
+
+/* simple_positive(file->f_dentry) respectively debugfs_positive(),
+ * but neither is "reachable" from here.
+ * So we have our own inline version of it above. :-( */
+static inline int debugfs_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+/* make sure at *open* time that the respective object won't go away. */
+static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),
+ void *data, struct kref *kref,
+ void (*release)(struct kref *))
+{
+ struct dentry *parent;
+ int ret = -ESTALE;
+
+ /* Are we still linked,
+ * or has debugfs_remove() already been called? */
+ parent = file->f_dentry->d_parent;
+ /* not sure if this can happen: */
+ if (!parent || !parent->d_inode)
+ goto out;
+ /* serialize with d_delete() */
+ mutex_lock(&parent->d_inode->i_mutex);
+ /* Make sure the object is still alive */
+ if (debugfs_positive(file->f_dentry)
+ && kref_get_unless_zero(kref))
+ ret = 0;
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (!ret) {
+ ret = single_open(file, show, data);
+ if (ret)
+ kref_put(kref, release);
+ }
+out:
+ return ret;
+}
+
+static int in_flight_summary_open(struct inode *inode, struct file *file)
+{
+ struct drbd_resource *resource = inode->i_private;
+ return drbd_single_open(file, in_flight_summary_show, resource,
+ &resource->kref, drbd_destroy_resource);
+}
+
+static int in_flight_summary_release(struct inode *inode, struct file *file)
+{
+ struct drbd_resource *resource = inode->i_private;
+ kref_put(&resource->kref, drbd_destroy_resource);
+ return single_release(inode, file);
+}
+
+static const struct file_operations in_flight_summary_fops = {
+ .owner = THIS_MODULE,
+ .open = in_flight_summary_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = in_flight_summary_release,
+};
+
+void drbd_debugfs_resource_add(struct drbd_resource *resource)
+{
+ struct dentry *dentry;
+ if (!drbd_debugfs_resources)
+ return;
+
+ dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res = dentry;
+
+ dentry = debugfs_create_dir("volumes", resource->debugfs_res);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res_volumes = dentry;
+
+ dentry = debugfs_create_dir("connections", resource->debugfs_res);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res_connections = dentry;
+
+ dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
+ resource->debugfs_res, resource,
+ &in_flight_summary_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ resource->debugfs_res_in_flight_summary = dentry;
+ return;
+
+fail:
+ drbd_debugfs_resource_cleanup(resource);
+ drbd_err(resource, "failed to create debugfs dentry\n");
+}
+
+static void drbd_debugfs_remove(struct dentry **dp)
+{
+ debugfs_remove(*dp);
+ *dp = NULL;
+}
+
+void drbd_debugfs_resource_cleanup(struct drbd_resource *resource)
+{
+ /* it is ok to call debugfs_remove(NULL) */
+ drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary);
+ drbd_debugfs_remove(&resource->debugfs_res_connections);
+ drbd_debugfs_remove(&resource->debugfs_res_volumes);
+ drbd_debugfs_remove(&resource->debugfs_res);
+}
+
+static void seq_print_one_timing_detail(struct seq_file *m,
+ const struct drbd_thread_timing_details *tdp,
+ unsigned long now)
+{
+ struct drbd_thread_timing_details td;
+ /* No locking...
+ * use temporary assignment to get at consistent data. */
+ do {
+ td = *tdp;
+ } while (td.cb_nr != tdp->cb_nr);
+ if (!td.cb_addr)
+ return;
+ seq_printf(m, "%u\t%d\t%s:%u\t%ps\n",
+ td.cb_nr,
+ jiffies_to_msecs(now - td.start_jif),
+ td.caller_fn, td.line,
+ td.cb_addr);
+}
+
+static void seq_print_timing_details(struct seq_file *m,
+ const char *title,
+ unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now)
+{
+ unsigned int start_idx;
+ unsigned int i;
+
+ seq_printf(m, "%s\n", title);
+ /* If not much is going on, this will result in natural ordering.
+ * If it is very busy, we will possibly skip events, or even see wrap
+ * arounds, which could only be avoided with locking.
+ */
+ start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST;
+ for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++)
+ seq_print_one_timing_detail(m, tdp+i, now);
+ for (i = 0; i < start_idx; i++)
+ seq_print_one_timing_detail(m, tdp+i, now);
+}
+
+static int callback_history_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_connection *connection = m->private;
+ unsigned long jif = jiffies;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ seq_puts(m, "n\tage\tcallsite\tfn\n");
+ seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif);
+ seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif);
+ return 0;
+}
+
+static int callback_history_open(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ return drbd_single_open(file, callback_history_show, connection,
+ &connection->kref, drbd_destroy_connection);
+}
+
+static int callback_history_release(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ kref_put(&connection->kref, drbd_destroy_connection);
+ return single_release(inode, file);
+}
+
+static const struct file_operations connection_callback_history_fops = {
+ .owner = THIS_MODULE,
+ .open = callback_history_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = callback_history_release,
+};
+
+static int connection_oldest_requests_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_connection *connection = m->private;
+ unsigned long now = jiffies;
+ struct drbd_request *r1, *r2;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ spin_lock_irq(&connection->resource->req_lock);
+ r1 = connection->req_next;
+ if (r1)
+ seq_print_minor_vnr_req(m, r1, now);
+ r2 = connection->req_ack_pending;
+ if (r2 && r2 != r1) {
+ r1 = r2;
+ seq_print_minor_vnr_req(m, r1, now);
+ }
+ r2 = connection->req_not_net_done;
+ if (r2 && r2 != r1)
+ seq_print_minor_vnr_req(m, r2, now);
+ spin_unlock_irq(&connection->resource->req_lock);
+ return 0;
+}
+
+static int connection_oldest_requests_open(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ return drbd_single_open(file, connection_oldest_requests_show, connection,
+ &connection->kref, drbd_destroy_connection);
+}
+
+static int connection_oldest_requests_release(struct inode *inode, struct file *file)
+{
+ struct drbd_connection *connection = inode->i_private;
+ kref_put(&connection->kref, drbd_destroy_connection);
+ return single_release(inode, file);
+}
+
+static const struct file_operations connection_oldest_requests_fops = {
+ .owner = THIS_MODULE,
+ .open = connection_oldest_requests_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = connection_oldest_requests_release,
+};
+
+void drbd_debugfs_connection_add(struct drbd_connection *connection)
+{
+ struct dentry *conns_dir = connection->resource->debugfs_res_connections;
+ struct dentry *dentry;
+ if (!conns_dir)
+ return;
+
+ /* Once we enable mutliple peers,
+ * these connections will have descriptive names.
+ * For now, it is just the one connection to the (only) "peer". */
+ dentry = debugfs_create_dir("peer", conns_dir);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ connection->debugfs_conn = dentry;
+
+ dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
+ connection->debugfs_conn, connection,
+ &connection_callback_history_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ connection->debugfs_conn_callback_history = dentry;
+
+ dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
+ connection->debugfs_conn, connection,
+ &connection_oldest_requests_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ connection->debugfs_conn_oldest_requests = dentry;
+ return;
+
+fail:
+ drbd_debugfs_connection_cleanup(connection);
+ drbd_err(connection, "failed to create debugfs dentry\n");
+}
+
+void drbd_debugfs_connection_cleanup(struct drbd_connection *connection)
+{
+ drbd_debugfs_remove(&connection->debugfs_conn_callback_history);
+ drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests);
+ drbd_debugfs_remove(&connection->debugfs_conn);
+}
+
+static void resync_dump_detail(struct seq_file *m, struct lc_element *e)
+{
+ struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
+
+ seq_printf(m, "%5d %s %s %s\n", bme->rs_left,
+ test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------",
+ test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------",
+ test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------"
+ );
+}
+
+static int device_resync_extents_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ if (get_ldev_if_state(device, D_FAILED)) {
+ lc_seq_printf_stats(m, device->resync);
+ lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail);
+ put_ldev(device);
+ }
+ return 0;
+}
+
+static int device_act_log_extents_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ if (get_ldev_if_state(device, D_FAILED)) {
+ lc_seq_printf_stats(m, device->act_log);
+ lc_seq_dump_details(m, device->act_log, "", NULL);
+ put_ldev(device);
+ }
+ return 0;
+}
+
+static int device_oldest_requests_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+ struct drbd_resource *resource = device->resource;
+ unsigned long now = jiffies;
+ struct drbd_request *r1, *r2;
+ int i;
+
+ /* BUMP me if you change the file format/content/presentation */
+ seq_printf(m, "v: %u\n\n", 0);
+
+ seq_puts(m, RQ_HDR);
+ spin_lock_irq(&resource->req_lock);
+ /* WRITE, then READ */
+ for (i = 1; i >= 0; --i) {
+ r1 = list_first_entry_or_null(&device->pending_master_completion[i],
+ struct drbd_request, req_pending_master_completion);
+ r2 = list_first_entry_or_null(&device->pending_completion[i],
+ struct drbd_request, req_pending_local);
+ if (r1)
+ seq_print_one_request(m, r1, now);
+ if (r2 && r2 != r1)
+ seq_print_one_request(m, r2, now);
+ }
+ spin_unlock_irq(&resource->req_lock);
+ return 0;
+}
+
+static int device_data_gen_id_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+ struct drbd_md *md;
+ enum drbd_uuid_index idx;
+
+ if (!get_ldev_if_state(device, D_FAILED))
+ return -ENODEV;
+
+ md = &device->ldev->md;
+ spin_lock_irq(&md->uuid_lock);
+ for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) {
+ seq_printf(m, "0x%016llX\n", md->uuid[idx]);
+ }
+ spin_unlock_irq(&md->uuid_lock);
+ put_ldev(device);
+ return 0;
+}
+
+#define drbd_debugfs_device_attr(name) \
+static int device_ ## name ## _open(struct inode *inode, struct file *file) \
+{ \
+ struct drbd_device *device = inode->i_private; \
+ return drbd_single_open(file, device_ ## name ## _show, device, \
+ &device->kref, drbd_destroy_device); \
+} \
+static int device_ ## name ## _release(struct inode *inode, struct file *file) \
+{ \
+ struct drbd_device *device = inode->i_private; \
+ kref_put(&device->kref, drbd_destroy_device); \
+ return single_release(inode, file); \
+} \
+static const struct file_operations device_ ## name ## _fops = { \
+ .owner = THIS_MODULE, \
+ .open = device_ ## name ## _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = device_ ## name ## _release, \
+};
+
+drbd_debugfs_device_attr(oldest_requests)
+drbd_debugfs_device_attr(act_log_extents)
+drbd_debugfs_device_attr(resync_extents)
+drbd_debugfs_device_attr(data_gen_id)
+
+void drbd_debugfs_device_add(struct drbd_device *device)
+{
+ struct dentry *vols_dir = device->resource->debugfs_res_volumes;
+ char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */
+ char vnr_buf[8]; /* volume number vnr is even 16 bit only; */
+ char *slink_name = NULL;
+
+ struct dentry *dentry;
+ if (!vols_dir || !drbd_debugfs_minors)
+ return;
+
+ snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr);
+ dentry = debugfs_create_dir(vnr_buf, vols_dir);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ device->debugfs_vol = dentry;
+
+ snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor);
+ slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u",
+ device->resource->name, device->vnr);
+ if (!slink_name)
+ goto fail;
+ dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name);
+ kfree(slink_name);
+ slink_name = NULL;
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ device->debugfs_minor = dentry;
+
+#define DCF(name) do { \
+ dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
+ device->debugfs_vol, device, \
+ &device_ ## name ## _fops); \
+ if (IS_ERR_OR_NULL(dentry)) \
+ goto fail; \
+ device->debugfs_vol_ ## name = dentry; \
+ } while (0)
+
+ DCF(oldest_requests);
+ DCF(act_log_extents);
+ DCF(resync_extents);
+ DCF(data_gen_id);
+#undef DCF
+ return;
+
+fail:
+ drbd_debugfs_device_cleanup(device);
+ drbd_err(device, "failed to create debugfs entries\n");
+}
+
+void drbd_debugfs_device_cleanup(struct drbd_device *device)
+{
+ drbd_debugfs_remove(&device->debugfs_minor);
+ drbd_debugfs_remove(&device->debugfs_vol_oldest_requests);
+ drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
+ drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
+ drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
+ drbd_debugfs_remove(&device->debugfs_vol);
+}
+
+void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device)
+{
+ struct dentry *conn_dir = peer_device->connection->debugfs_conn;
+ struct dentry *dentry;
+ char vnr_buf[8];
+
+ if (!conn_dir)
+ return;
+
+ snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr);
+ dentry = debugfs_create_dir(vnr_buf, conn_dir);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ peer_device->debugfs_peer_dev = dentry;
+ return;
+
+fail:
+ drbd_debugfs_peer_device_cleanup(peer_device);
+ drbd_err(peer_device, "failed to create debugfs entries\n");
+}
+
+void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device)
+{
+ drbd_debugfs_remove(&peer_device->debugfs_peer_dev);
+}
+
+static int drbd_version_show(struct seq_file *m, void *ignored)
+{
+ seq_printf(m, "# %s\n", drbd_buildtag());
+ seq_printf(m, "VERSION=%s\n", REL_VERSION);
+ seq_printf(m, "API_VERSION=%u\n", API_VERSION);
+ seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN);
+ seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX);
+ return 0;
+}
+
+static int drbd_version_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, drbd_version_show, NULL);
+}
+
+static struct file_operations drbd_version_fops = {
+ .owner = THIS_MODULE,
+ .open = drbd_version_open,
+ .llseek = seq_lseek,
+ .read = seq_read,
+ .release = single_release,
+};
+
+/* not __exit, may be indirectly called
+ * from the module-load-failure path as well. */
+void drbd_debugfs_cleanup(void)
+{
+ drbd_debugfs_remove(&drbd_debugfs_resources);
+ drbd_debugfs_remove(&drbd_debugfs_minors);
+ drbd_debugfs_remove(&drbd_debugfs_version);
+ drbd_debugfs_remove(&drbd_debugfs_root);
+}
+
+int __init drbd_debugfs_init(void)
+{
+ struct dentry *dentry;
+
+ dentry = debugfs_create_dir("drbd", NULL);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_root = dentry;
+
+ dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_version = dentry;
+
+ dentry = debugfs_create_dir("resources", drbd_debugfs_root);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_resources = dentry;
+
+ dentry = debugfs_create_dir("minors", drbd_debugfs_root);
+ if (IS_ERR_OR_NULL(dentry))
+ goto fail;
+ drbd_debugfs_minors = dentry;
+ return 0;
+
+fail:
+ drbd_debugfs_cleanup();
+ if (dentry)
+ return PTR_ERR(dentry);
+ else
+ return -EINVAL;
+}
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h
new file mode 100644
index 00000000000..8bee21340dc
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.h
@@ -0,0 +1,39 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+
+#include "drbd_int.h"
+
+#ifdef CONFIG_DEBUG_FS
+int __init drbd_debugfs_init(void);
+void drbd_debugfs_cleanup(void);
+
+void drbd_debugfs_resource_add(struct drbd_resource *resource);
+void drbd_debugfs_resource_cleanup(struct drbd_resource *resource);
+
+void drbd_debugfs_connection_add(struct drbd_connection *connection);
+void drbd_debugfs_connection_cleanup(struct drbd_connection *connection);
+
+void drbd_debugfs_device_add(struct drbd_device *device);
+void drbd_debugfs_device_cleanup(struct drbd_device *device);
+
+void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device);
+void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device);
+#else
+
+static inline int __init drbd_debugfs_init(void) { return -ENODEV; }
+static inline void drbd_debugfs_cleanup(void) { }
+
+static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { }
+static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { }
+
+static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { }
+static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { }
+
+static inline void drbd_debugfs_device_add(struct drbd_device *device) { }
+static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { }
+
+static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { }
+static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { }
+
+#endif
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a76ceb344d6..1a000016ccd 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -317,7 +317,63 @@ struct drbd_request {
struct list_head tl_requests; /* ring list in the transfer log */
struct bio *master_bio; /* master bio pointer */
- unsigned long start_time;
+
+ /* see struct drbd_device */
+ struct list_head req_pending_master_completion;
+ struct list_head req_pending_local;
+
+ /* for generic IO accounting */
+ unsigned long start_jif;
+
+ /* for DRBD internal statistics */
+
+ /* Minimal set of time stamps to determine if we wait for activity log
+ * transactions, local disk or peer. 32 bit "jiffies" are good enough,
+ * we don't expect a DRBD request to be stalled for several month.
+ */
+
+ /* before actual request processing */
+ unsigned long in_actlog_jif;
+
+ /* local disk */
+ unsigned long pre_submit_jif;
+
+ /* per connection */
+ unsigned long pre_send_jif;
+ unsigned long acked_jif;
+ unsigned long net_done_jif;
+
+ /* Possibly even more detail to track each phase:
+ * master_completion_jif
+ * how long did it take to complete the master bio
+ * (application visible latency)
+ * allocated_jif
+ * how long the master bio was blocked until we finally allocated
+ * a tracking struct
+ * in_actlog_jif
+ * how long did we wait for activity log transactions
+ *
+ * net_queued_jif
+ * when did we finally queue it for sending
+ * pre_send_jif
+ * when did we start sending it
+ * post_send_jif
+ * how long did we block in the network stack trying to send it
+ * acked_jif
+ * when did we receive (or fake, in protocol A) a remote ACK
+ * net_done_jif
+ * when did we receive final acknowledgement (P_BARRIER_ACK),
+ * or decide, e.g. on connection loss, that we do no longer expect
+ * anything from this peer for this request.
+ *
+ * pre_submit_jif
+ * post_sub_jif
+ * when did we start submiting to the lower level device,
+ * and how long did we block in that submit function
+ * local_completion_jif
+ * how long did it take the lower level device to complete this request
+ */
+
/* once it hits 0, we may complete the master_bio */
atomic_t completion_ref;
@@ -366,6 +422,7 @@ struct drbd_peer_request {
struct drbd_interval i;
/* see comments on ee flag bits below */
unsigned long flags;
+ unsigned long submit_jif;
union {
u64 block_id;
struct digest_info *digest;
@@ -408,6 +465,17 @@ enum {
/* Is set when net_conf had two_primaries set while creating this peer_req */
__EE_IN_INTERVAL_TREE,
+
+ /* for debugfs: */
+ /* has this been submitted, or does it still wait for something else? */
+ __EE_SUBMITTED,
+
+ /* this is/was a write request */
+ __EE_WRITE,
+
+ /* this originates from application on peer
+ * (not some resync or verify or other DRBD internal request) */
+ __EE_APPLICATION,
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
@@ -419,6 +487,9 @@ enum {
#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK)
#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
+#define EE_SUBMITTED (1<<__EE_SUBMITTED)
+#define EE_WRITE (1<<__EE_WRITE)
+#define EE_APPLICATION (1<<__EE_APPLICATION)
/* flag bits per device */
enum {
@@ -433,11 +504,11 @@ enum {
CONSIDER_RESYNC,
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
+
SUSPEND_IO, /* suspend application io */
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
- GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed, returned IO error */
WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
@@ -450,6 +521,20 @@ enum {
B_RS_H_DONE, /* Before resync handler done (already executed) */
DISCARD_MY_DATA, /* discard_my_data flag per volume */
READ_BALANCE_RR,
+
+ FLUSH_PENDING, /* if set, device->flush_jif is when we submitted that flush
+ * from drbd_flush_after_epoch() */
+
+ /* cleared only after backing device related structures have been destroyed. */
+ GOING_DISKLESS, /* Disk is being detached, because of io-error, or admin request. */
+
+ /* to be used in drbd_device_post_work() */
+ GO_DISKLESS, /* tell worker to schedule cleanup before detach */
+ DESTROY_DISK, /* tell worker to close backing devices and destroy related structures. */
+ MD_SYNC, /* tell worker to call drbd_md_sync() */
+ RS_START, /* tell worker to start resync/OV */
+ RS_PROGRESS, /* tell worker that resync made significant progress */
+ RS_DONE, /* tell worker that resync is done */
};
struct drbd_bitmap; /* opaque for drbd_device */
@@ -531,6 +616,11 @@ struct drbd_backing_dev {
};
struct drbd_md_io {
+ struct page *page;
+ unsigned long start_jif; /* last call to drbd_md_get_buffer */
+ unsigned long submit_jif; /* last _drbd_md_sync_page_io() submit */
+ const char *current_use;
+ atomic_t in_use;
unsigned int done;
int error;
};
@@ -577,10 +667,18 @@ enum {
* and potentially deadlock on, this drbd worker.
*/
DISCONNECT_SENT,
+
+ DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
};
struct drbd_resource {
char *name;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_res;
+ struct dentry *debugfs_res_volumes;
+ struct dentry *debugfs_res_connections;
+ struct dentry *debugfs_res_in_flight_summary;
+#endif
struct kref kref;
struct idr devices; /* volume number to device mapping */
struct list_head connections;
@@ -594,12 +692,28 @@ struct drbd_resource {
unsigned susp_nod:1; /* IO suspended because no data */
unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
+ enum write_ordering_e write_ordering;
+
cpumask_var_t cpu_mask;
};
+struct drbd_thread_timing_details
+{
+ unsigned long start_jif;
+ void *cb_addr;
+ const char *caller_fn;
+ unsigned int line;
+ unsigned int cb_nr;
+};
+
struct drbd_connection {
struct list_head connections;
struct drbd_resource *resource;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_conn;
+ struct dentry *debugfs_conn_callback_history;
+ struct dentry *debugfs_conn_oldest_requests;
+#endif
struct kref kref;
struct idr peer_devices; /* volume number to peer device mapping */
enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */
@@ -636,7 +750,6 @@ struct drbd_connection {
struct drbd_epoch *current_epoch;
spinlock_t epoch_lock;
unsigned int epochs;
- enum write_ordering_e write_ordering;
atomic_t current_tle_nr; /* transfer log epoch number */
unsigned current_tle_writes; /* writes seen within this tl epoch */
@@ -645,9 +758,22 @@ struct drbd_connection {
struct drbd_thread worker;
struct drbd_thread asender;
+ /* cached pointers,
+ * so we can look up the oldest pending requests more quickly.
+ * protected by resource->req_lock */
+ struct drbd_request *req_next; /* DRBD 9: todo.req_next */
+ struct drbd_request *req_ack_pending;
+ struct drbd_request *req_not_net_done;
+
/* sender side */
struct drbd_work_queue sender_work;
+#define DRBD_THREAD_DETAILS_HIST 16
+ unsigned int w_cb_nr; /* keeps counting up */
+ unsigned int r_cb_nr; /* keeps counting up */
+ struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST];
+ struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
+
struct {
/* whether this sender thread
* has processed a single write yet. */
@@ -663,11 +789,22 @@ struct drbd_connection {
} send;
};
+void __update_timing_details(
+ struct drbd_thread_timing_details *tdp,
+ unsigned int *cb_nr,
+ void *cb,
+ const char *fn, const unsigned int line);
+
+#define update_worker_timing_details(c, cb) \
+ __update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ )
+#define update_receiver_timing_details(c, cb) \
+ __update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ )
+
struct submit_worker {
struct workqueue_struct *wq;
struct work_struct worker;
- spinlock_t lock;
+ /* protected by ..->resource->req_lock */
struct list_head writes;
};
@@ -675,12 +812,29 @@ struct drbd_peer_device {
struct list_head peer_devices;
struct drbd_device *device;
struct drbd_connection *connection;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_peer_dev;
+#endif
};
struct drbd_device {
struct drbd_resource *resource;
struct list_head peer_devices;
- int vnr; /* volume number within the connection */
+ struct list_head pending_bitmap_io;
+
+ unsigned long flush_jif;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_minor;
+ struct dentry *debugfs_vol;
+ struct dentry *debugfs_vol_oldest_requests;
+ struct dentry *debugfs_vol_act_log_extents;
+ struct dentry *debugfs_vol_resync_extents;
+ struct dentry *debugfs_vol_data_gen_id;
+#endif
+
+ unsigned int vnr; /* volume number within the connection */
+ unsigned int minor; /* device minor number */
+
struct kref kref;
/* things that are stored as / read from meta data on disk */
@@ -697,19 +851,10 @@ struct drbd_device {
unsigned long last_reattach_jif;
struct drbd_work resync_work;
struct drbd_work unplug_work;
- struct drbd_work go_diskless;
- struct drbd_work md_sync_work;
- struct drbd_work start_resync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
struct timer_list start_resync_timer;
struct timer_list request_timer;
-#ifdef DRBD_DEBUG_MD_SYNC
- struct {
- unsigned int line;
- const char* func;
- } last_md_mark_dirty;
-#endif
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
@@ -724,6 +869,7 @@ struct drbd_device {
unsigned int al_writ_cnt;
unsigned int bm_writ_cnt;
atomic_t ap_bio_cnt; /* Requests we need to complete */
+ atomic_t ap_actlog_cnt; /* Requests waiting for activity log */
atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
atomic_t unacked_cnt; /* Need to send replies for */
@@ -733,6 +879,13 @@ struct drbd_device {
struct rb_root read_requests;
struct rb_root write_requests;
+ /* for statistics and timeouts */
+ /* [0] read, [1] write */
+ struct list_head pending_master_completion[2];
+ struct list_head pending_completion[2];
+
+ /* use checksums for *this* resync */
+ bool use_csums;
/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
unsigned long rs_total;
/* number of resync blocks that failed in this run */
@@ -788,9 +941,7 @@ struct drbd_device {
atomic_t pp_in_use; /* allocated from page pool */
atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
wait_queue_head_t ee_wait;
- struct page *md_io_page; /* one page buffer for md_io */
struct drbd_md_io md_io;
- atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
spinlock_t al_lock;
wait_queue_head_t al_wait;
struct lru_cache *act_log; /* activity log */
@@ -800,7 +951,6 @@ struct drbd_device {
atomic_t packet_seq;
unsigned int peer_seq;
spinlock_t peer_seq_lock;
- unsigned int minor;
unsigned long comm_bm_set; /* communicated number of set bits. */
struct bm_io_work bm_io_work;
u64 ed_uuid; /* UUID of the exposed data */
@@ -824,6 +974,21 @@ struct drbd_device {
struct submit_worker submit;
};
+struct drbd_bm_aio_ctx {
+ struct drbd_device *device;
+ struct list_head list; /* on device->pending_bitmap_io */;
+ unsigned long start_jif;
+ atomic_t in_flight;
+ unsigned int done;
+ unsigned flags;
+#define BM_AIO_COPY_PAGES 1
+#define BM_AIO_WRITE_HINTED 2
+#define BM_AIO_WRITE_ALL_PAGES 4
+#define BM_AIO_READ 8
+ int error;
+ struct kref kref;
+};
+
struct drbd_config_context {
/* assigned from drbd_genlmsghdr */
unsigned int minor;
@@ -949,7 +1114,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
extern int drbd_send_bitmap(struct drbd_device *device);
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
-extern void drbd_free_bc(struct drbd_backing_dev *ldev);
+extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
void drbd_print_uuids(struct drbd_device *device, const char *text);
@@ -966,13 +1131,7 @@ extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must
extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local);
extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local);
extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
-#ifndef DRBD_DEBUG_MD_SYNC
extern void drbd_md_mark_dirty(struct drbd_device *device);
-#else
-#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
-extern void drbd_md_mark_dirty_(struct drbd_device *device,
- unsigned int line, const char *func);
-#endif
extern void drbd_queue_bitmap_io(struct drbd_device *device,
int (*io_fn)(struct drbd_device *),
void (*done)(struct drbd_device *, int),
@@ -983,9 +1142,8 @@ extern int drbd_bitmap_io(struct drbd_device *device,
extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
int (*io_fn)(struct drbd_device *),
char *why, enum bm_flag flags);
-extern int drbd_bmio_set_n_write(struct drbd_device *device);
-extern int drbd_bmio_clear_n_write(struct drbd_device *device);
-extern void drbd_ldev_destroy(struct drbd_device *device);
+extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
+extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
/* Meta data layout
*
@@ -1105,17 +1263,21 @@ struct bm_extent {
/* in which _bitmap_ extent (resp. sector) the bit for a certain
* _storage_ sector is located in */
#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9))
+#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
-/* how much _storage_ sectors we have per bitmap sector */
+/* first storage sector a bitmap extent corresponds to */
#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9))
+/* how much _storage_ sectors we have per bitmap extent */
#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1)
+/* how many bits are covered by one bitmap extent (resync extent) */
+#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
+
+#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1)
+
/* in one sector of the bitmap, we have this many activity_log extents. */
#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
-#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
-#define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
-
/* the extent in "PER_EXTENT" below is an activity log extent
* we need that many (long words/bytes) to store the bitmap
* of one AL_EXTENT_SIZE chunk of storage.
@@ -1195,11 +1357,11 @@ extern void _drbd_bm_set_bits(struct drbd_device *device,
const unsigned long s, const unsigned long e);
extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
-extern int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local);
extern int drbd_bm_read(struct drbd_device *device) __must_hold(local);
extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
extern int drbd_bm_write(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
+extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
extern size_t drbd_bm_words(struct drbd_device *device);
@@ -1213,7 +1375,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon
extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
-extern int drbd_bm_rs_done(struct drbd_device *device);
/* for receive_bitmap */
extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
size_t number, unsigned long *buffer);
@@ -1312,7 +1473,7 @@ enum determine_dev_size {
extern enum determine_dev_size
drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
extern void resync_after_online_grow(struct drbd_device *);
-extern void drbd_reconsider_max_bio_size(struct drbd_device *device);
+extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
enum drbd_role new_role,
int force);
@@ -1333,7 +1494,7 @@ extern void resume_next_sg(struct drbd_device *device);
extern void suspend_other_sg(struct drbd_device *device);
extern int drbd_resync_finished(struct drbd_device *device);
/* maybe rather drbd_main.c ? */
-extern void *drbd_md_get_buffer(struct drbd_device *device);
+extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
extern void drbd_md_put_buffer(struct drbd_device *device);
extern int drbd_md_sync_page_io(struct drbd_device *device,
struct drbd_backing_dev *bdev, sector_t sector, int rw);
@@ -1380,7 +1541,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
-extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+ bool throttle_if_app_is_waiting);
extern int drbd_submit_peer_request(struct drbd_device *,
struct drbd_peer_request *, const unsigned,
const int);
@@ -1464,10 +1626,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
{
__release(local);
if (!bio->bi_bdev) {
- printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
- "bio->bi_bdev == NULL\n",
- device_to_minor(device));
- dump_stack();
+ drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
bio_endio(bio, -ENODEV);
return;
}
@@ -1478,7 +1637,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
generic_make_request(bio);
}
-void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+ enum write_ordering_e wo);
/* drbd_proc.c */
extern struct proc_dir_entry *drbd_proc;
@@ -1489,9 +1649,9 @@ extern const char *drbd_role_str(enum drbd_role s);
/* drbd_actlog.c */
extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
-extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
+extern void drbd_al_begin_io_commit(struct drbd_device *device);
extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
-extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate);
+extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
@@ -1501,14 +1661,17 @@ extern int drbd_rs_del_all(struct drbd_device *device);
extern void drbd_rs_failed_io(struct drbd_device *device,
sector_t sector, int size);
extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
-extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector,
- int size, const char *file, const unsigned int line);
+
+enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
+extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+ enum update_sync_bits_mode mode,
+ const char *file, const unsigned int line);
#define drbd_set_in_sync(device, sector, size) \
- __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__)
-extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector,
- int size, const char *file, const unsigned int line);
+ __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__)
#define drbd_set_out_of_sync(device, sector, size) \
- __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__)
+ __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__)
+#define drbd_rs_failed_io(device, sector, size) \
+ __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__)
extern void drbd_al_shrink(struct drbd_device *device);
extern int drbd_initialize_al(struct drbd_device *, void *);
@@ -1764,25 +1927,38 @@ static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
}
static inline void
-drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
+drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
{
unsigned long flags;
spin_lock_irqsave(&q->q_lock, flags);
- list_add(&w->list, &q->q);
+ list_add_tail(&w->list, &q->q);
spin_unlock_irqrestore(&q->q_lock, flags);
wake_up(&q->q_wait);
}
static inline void
-drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w)
{
unsigned long flags;
spin_lock_irqsave(&q->q_lock, flags);
- list_add_tail(&w->list, &q->q);
+ if (list_empty_careful(&w->list))
+ list_add_tail(&w->list, &q->q);
spin_unlock_irqrestore(&q->q_lock, flags);
wake_up(&q->q_wait);
}
+static inline void
+drbd_device_post_work(struct drbd_device *device, int work_bit)
+{
+ if (!test_and_set_bit(work_bit, &device->flags)) {
+ struct drbd_connection *connection =
+ first_peer_device(device)->connection;
+ struct drbd_work_queue *q = &connection->sender_work;
+ if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags))
+ wake_up(&q->q_wait);
+ }
+}
+
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
static inline void wake_asender(struct drbd_connection *connection)
@@ -1859,7 +2035,7 @@ static inline void inc_ap_pending(struct drbd_device *device)
func, line, \
atomic_read(&device->which))
-#define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__)
+#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
{
if (atomic_dec_and_test(&device->ap_pending_cnt))
@@ -1878,7 +2054,7 @@ static inline void inc_rs_pending(struct drbd_device *device)
atomic_inc(&device->rs_pending_cnt);
}
-#define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__)
+#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
{
atomic_dec(&device->rs_pending_cnt);
@@ -1899,20 +2075,29 @@ static inline void inc_unacked(struct drbd_device *device)
atomic_inc(&device->unacked_cnt);
}
-#define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__)
+#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
{
atomic_dec(&device->unacked_cnt);
ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
}
-#define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__)
+#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
{
atomic_sub(n, &device->unacked_cnt);
ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
}
+static inline bool is_sync_state(enum drbd_conns connection_state)
+{
+ return
+ (connection_state == C_SYNC_SOURCE
+ || connection_state == C_SYNC_TARGET
+ || connection_state == C_PAUSED_SYNC_S
+ || connection_state == C_PAUSED_SYNC_T);
+}
+
/**
* get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
* @M: DRBD device.
@@ -1924,6 +2109,11 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f
static inline void put_ldev(struct drbd_device *device)
{
+ enum drbd_disk_state ds = device->state.disk;
+ /* We must check the state *before* the atomic_dec becomes visible,
+ * or we have a theoretical race where someone hitting zero,
+ * while state still D_FAILED, will then see D_DISKLESS in the
+ * condition below and calling into destroy, where he must not, yet. */
int i = atomic_dec_return(&device->local_cnt);
/* This may be called from some endio handler,
@@ -1932,15 +2122,13 @@ static inline void put_ldev(struct drbd_device *device)
__release(local);
D_ASSERT(device, i >= 0);
if (i == 0) {
- if (device->state.disk == D_DISKLESS)
+ if (ds == D_DISKLESS)
/* even internal references gone, safe to destroy */
- drbd_ldev_destroy(device);
- if (device->state.disk == D_FAILED) {
+ drbd_device_post_work(device, DESTROY_DISK);
+ if (ds == D_FAILED)
/* all application IO references gone. */
- if (!test_and_set_bit(GO_DISKLESS, &device->flags))
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
- &device->go_diskless);
- }
+ if (!test_and_set_bit(GOING_DISKLESS, &device->flags))
+ drbd_device_post_work(device, GO_DISKLESS);
wake_up(&device->misc_wait);
}
}
@@ -1964,54 +2152,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_
extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
#endif
-/* you must have an "get_ldev" reference */
-static inline void drbd_get_syncer_progress(struct drbd_device *device,
- unsigned long *bits_left, unsigned int *per_mil_done)
-{
- /* this is to break it at compile time when we change that, in case we
- * want to support more than (1<<32) bits on a 32bit arch. */
- typecheck(unsigned long, device->rs_total);
-
- /* note: both rs_total and rs_left are in bits, i.e. in
- * units of BM_BLOCK_SIZE.
- * for the percentage, we don't care. */
-
- if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
- *bits_left = device->ov_left;
- else
- *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
- /* >> 10 to prevent overflow,
- * +1 to prevent division by zero */
- if (*bits_left > device->rs_total) {
- /* doh. maybe a logic bug somewhere.
- * may also be just a race condition
- * between this and a disconnect during sync.
- * for now, just prevent in-kernel buffer overflow.
- */
- smp_rmb();
- drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
- drbd_conn_str(device->state.conn),
- *bits_left, device->rs_total, device->rs_failed);
- *per_mil_done = 0;
- } else {
- /* Make sure the division happens in long context.
- * We allow up to one petabyte storage right now,
- * at a granularity of 4k per bit that is 2**38 bits.
- * After shift right and multiplication by 1000,
- * this should still fit easily into a 32bit long,
- * so we don't need a 64bit division on 32bit arch.
- * Note: currently we don't support such large bitmaps on 32bit
- * arch anyways, but no harm done to be prepared for it here.
- */
- unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10;
- unsigned long left = *bits_left >> shift;
- unsigned long total = 1UL + (device->rs_total >> shift);
- unsigned long tmp = 1000UL - left * 1000UL/total;
- *per_mil_done = tmp;
- }
-}
-
-
/* this throttles on-the-fly application requests
* according to max_buffers settings;
* maybe re-implement using semaphores? */
@@ -2201,25 +2341,6 @@ static inline int drbd_queue_order_type(struct drbd_device *device)
return QUEUE_ORDERED_NONE;
}
-static inline void drbd_md_flush(struct drbd_device *device)
-{
- int r;
-
- if (device->ldev == NULL) {
- drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n");
- return;
- }
-
- if (test_bit(MD_NO_FUA, &device->flags))
- return;
-
- r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL);
- if (r) {
- set_bit(MD_NO_FUA, &device->flags);
- drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r);
- }
-}
-
static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
{
return list_first_entry_or_null(&resource->connections,
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index f38fcb00c10..f210543f05f 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -10,7 +10,9 @@ struct drbd_interval {
unsigned int size; /* size in bytes */
sector_t end; /* highest interval end in subtree */
int local:1 /* local or remote request? */;
- int waiting:1;
+ int waiting:1; /* someone is waiting for this to complete */
+ int completed:1; /* this has been completed already;
+ * ignore for conflict detection */
};
static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 960645c26e6..9b465bb6848 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -26,7 +26,10 @@
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
+#include <linux/jiffies.h>
#include <linux/drbd.h>
#include <asm/uaccess.h>
#include <asm/types.h>
@@ -54,16 +57,14 @@
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
-
#include "drbd_vli.h"
+#include "drbd_debugfs.h"
static DEFINE_MUTEX(drbd_main_mutex);
static int drbd_open(struct block_device *bdev, fmode_t mode);
static void drbd_release(struct gendisk *gd, fmode_t mode);
-static int w_md_sync(struct drbd_work *w, int unused);
static void md_sync_timer_fn(unsigned long data);
static int w_bitmap_io(struct drbd_work *w, int unused);
-static int w_go_diskless(struct drbd_work *w, int unused);
MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
"Lars Ellenberg <lars@linbit.com>");
@@ -264,7 +265,7 @@ bail:
/**
* _tl_restart() - Walks the transfer log, and applies an action to all requests
- * @device: DRBD device.
+ * @connection: DRBD connection to operate on.
* @what: The action/event to perform with all request objects
*
* @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
@@ -662,6 +663,11 @@ static int __send_command(struct drbd_connection *connection, int vnr,
msg_flags);
if (data && !err)
err = drbd_send_all(connection, sock->socket, data, size, 0);
+ /* DRBD protocol "pings" are latency critical.
+ * This is supposed to trigger tcp_push_pending_frames() */
+ if (!err && (cmd == P_PING || cmd == P_PING_ACK))
+ drbd_tcp_nodelay(sock->socket);
+
return err;
}
@@ -1636,7 +1642,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
if (peer_device->connection->agreed_pro_version >= 100) {
if (req->rq_state & RQ_EXP_RECEIVE_ACK)
dp_flags |= DP_SEND_RECEIVE_ACK;
- if (req->rq_state & RQ_EXP_WRITE_ACK)
+ /* During resync, request an explicit write ack,
+ * even in protocol != C */
+ if (req->rq_state & RQ_EXP_WRITE_ACK
+ || (dp_flags & DP_MAY_SET_IN_SYNC))
dp_flags |= DP_SEND_WRITE_ACK;
}
p->dp_flags = cpu_to_be32(dp_flags);
@@ -1900,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
drbd_set_defaults(device);
atomic_set(&device->ap_bio_cnt, 0);
+ atomic_set(&device->ap_actlog_cnt, 0);
atomic_set(&device->ap_pending_cnt, 0);
atomic_set(&device->rs_pending_cnt, 0);
atomic_set(&device->unacked_cnt, 0);
@@ -1908,7 +1918,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
atomic_set(&device->rs_sect_in, 0);
atomic_set(&device->rs_sect_ev, 0);
atomic_set(&device->ap_in_flight, 0);
- atomic_set(&device->md_io_in_use, 0);
+ atomic_set(&device->md_io.in_use, 0);
mutex_init(&device->own_state_mutex);
device->state_mutex = &device->own_state_mutex;
@@ -1924,17 +1934,15 @@ void drbd_init_set_defaults(struct drbd_device *device)
INIT_LIST_HEAD(&device->resync_reads);
INIT_LIST_HEAD(&device->resync_work.list);
INIT_LIST_HEAD(&device->unplug_work.list);
- INIT_LIST_HEAD(&device->go_diskless.list);
- INIT_LIST_HEAD(&device->md_sync_work.list);
- INIT_LIST_HEAD(&device->start_resync_work.list);
INIT_LIST_HEAD(&device->bm_io_work.w.list);
+ INIT_LIST_HEAD(&device->pending_master_completion[0]);
+ INIT_LIST_HEAD(&device->pending_master_completion[1]);
+ INIT_LIST_HEAD(&device->pending_completion[0]);
+ INIT_LIST_HEAD(&device->pending_completion[1]);
device->resync_work.cb = w_resync_timer;
device->unplug_work.cb = w_send_write_hint;
- device->go_diskless.cb = w_go_diskless;
- device->md_sync_work.cb = w_md_sync;
device->bm_io_work.w.cb = w_bitmap_io;
- device->start_resync_work.cb = w_start_resync;
init_timer(&device->resync_timer);
init_timer(&device->md_sync_timer);
@@ -1992,7 +2000,7 @@ void drbd_device_cleanup(struct drbd_device *device)
drbd_bm_cleanup(device);
}
- drbd_free_bc(device->ldev);
+ drbd_free_ldev(device->ldev);
device->ldev = NULL;
clear_bit(AL_SUSPENDED, &device->flags);
@@ -2006,7 +2014,6 @@ void drbd_device_cleanup(struct drbd_device *device)
D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q));
D_ASSERT(device, list_empty(&device->resync_work.list));
D_ASSERT(device, list_empty(&device->unplug_work.list));
- D_ASSERT(device, list_empty(&device->go_diskless.list));
drbd_set_defaults(device);
}
@@ -2129,20 +2136,6 @@ Enomem:
return -ENOMEM;
}
-static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
- void *unused)
-{
- /* just so we have it. you never know what interesting things we
- * might want to do here some day...
- */
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block drbd_notifier = {
- .notifier_call = drbd_notify_sys,
-};
-
static void drbd_release_all_peer_reqs(struct drbd_device *device)
{
int rr;
@@ -2173,7 +2166,7 @@ void drbd_destroy_device(struct kref *kref)
{
struct drbd_device *device = container_of(kref, struct drbd_device, kref);
struct drbd_resource *resource = device->resource;
- struct drbd_connection *connection;
+ struct drbd_peer_device *peer_device, *tmp_peer_device;
del_timer_sync(&device->request_timer);
@@ -2187,7 +2180,7 @@ void drbd_destroy_device(struct kref *kref)
if (device->this_bdev)
bdput(device->this_bdev);
- drbd_free_bc(device->ldev);
+ drbd_free_ldev(device->ldev);
device->ldev = NULL;
drbd_release_all_peer_reqs(device);
@@ -2200,15 +2193,20 @@ void drbd_destroy_device(struct kref *kref)
if (device->bitmap) /* should no longer be there. */
drbd_bm_cleanup(device);
- __free_page(device->md_io_page);
+ __free_page(device->md_io.page);
put_disk(device->vdisk);
blk_cleanup_queue(device->rq_queue);
kfree(device->rs_plan_s);
- kfree(first_peer_device(device));
- kfree(device);
- for_each_connection(connection, resource)
- kref_put(&connection->kref, drbd_destroy_connection);
+ /* not for_each_connection(connection, resource):
+ * those may have been cleaned up and disassociated already.
+ */
+ for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
+ kref_put(&peer_device->connection->kref, drbd_destroy_connection);
+ kfree(peer_device);
+ }
+ memset(device, 0xfd, sizeof(*device));
+ kfree(device);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2236,7 +2234,7 @@ static void do_retry(struct work_struct *ws)
list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
struct drbd_device *device = req->device;
struct bio *bio = req->master_bio;
- unsigned long start_time = req->start_time;
+ unsigned long start_jif = req->start_jif;
bool expected;
expected =
@@ -2271,10 +2269,12 @@ static void do_retry(struct work_struct *ws)
/* We are not just doing generic_make_request(),
* as we want to keep the start_time information. */
inc_ap_bio(device);
- __drbd_make_request(device, bio, start_time);
+ __drbd_make_request(device, bio, start_jif);
}
}
+/* called via drbd_req_put_completion_ref(),
+ * holds resource->req_lock */
void drbd_restart_request(struct drbd_request *req)
{
unsigned long flags;
@@ -2298,6 +2298,7 @@ void drbd_destroy_resource(struct kref *kref)
idr_destroy(&resource->devices);
free_cpumask_var(resource->cpu_mask);
kfree(resource->name);
+ memset(resource, 0xf2, sizeof(*resource));
kfree(resource);
}
@@ -2307,8 +2308,10 @@ void drbd_free_resource(struct drbd_resource *resource)
for_each_connection_safe(connection, tmp, resource) {
list_del(&connection->connections);
+ drbd_debugfs_connection_cleanup(connection);
kref_put(&connection->kref, drbd_destroy_connection);
}
+ drbd_debugfs_resource_cleanup(resource);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2318,8 +2321,6 @@ static void drbd_cleanup(void)
struct drbd_device *device;
struct drbd_resource *resource, *tmp;
- unregister_reboot_notifier(&drbd_notifier);
-
/* first remove proc,
* drbdsetup uses it's presence to detect
* whether DRBD is loaded.
@@ -2335,6 +2336,7 @@ static void drbd_cleanup(void)
destroy_workqueue(retry.wq);
drbd_genl_unregister();
+ drbd_debugfs_cleanup();
idr_for_each_entry(&drbd_devices, device, i)
drbd_delete_device(device);
@@ -2350,7 +2352,7 @@ static void drbd_cleanup(void)
idr_destroy(&drbd_devices);
- printk(KERN_INFO "drbd: module cleanup done.\n");
+ pr_info("module cleanup done.\n");
}
/**
@@ -2539,6 +2541,20 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE,
cpumask_bits(new_cpu_mask), nr_cpu_ids);
+ if (err == -EOVERFLOW) {
+ /* So what. mask it out. */
+ cpumask_var_t tmp_cpu_mask;
+ if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) {
+ cpumask_setall(tmp_cpu_mask);
+ cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask);
+ drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n",
+ res_opts->cpu_mask,
+ strlen(res_opts->cpu_mask) > 12 ? "..." : "",
+ nr_cpu_ids);
+ free_cpumask_var(tmp_cpu_mask);
+ err = 0;
+ }
+ }
if (err) {
drbd_warn(resource, "bitmap_parse() failed with %d\n", err);
/* retcode = ERR_CPU_MASK_PARSE; */
@@ -2579,10 +2595,12 @@ struct drbd_resource *drbd_create_resource(const char *name)
kref_init(&resource->kref);
idr_init(&resource->devices);
INIT_LIST_HEAD(&resource->connections);
+ resource->write_ordering = WO_bdev_flush;
list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex);
spin_lock_init(&resource->req_lock);
+ drbd_debugfs_resource_add(resource);
return resource;
fail_free_name:
@@ -2593,7 +2611,7 @@ fail:
return NULL;
}
-/* caller must be under genl_lock() */
+/* caller must be under adm_mutex */
struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
{
struct drbd_resource *resource;
@@ -2617,7 +2635,6 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
INIT_LIST_HEAD(&connection->current_epoch->list);
connection->epochs = 1;
spin_lock_init(&connection->epoch_lock);
- connection->write_ordering = WO_bdev_flush;
connection->send.seen_any_write_yet = false;
connection->send.current_epoch_nr = 0;
@@ -2652,6 +2669,7 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
kref_get(&resource->kref);
list_add_tail_rcu(&connection->connections, &resource->connections);
+ drbd_debugfs_connection_add(connection);
return connection;
fail_resource:
@@ -2680,6 +2698,7 @@ void drbd_destroy_connection(struct kref *kref)
drbd_free_socket(&connection->data);
kfree(connection->int_dig_in);
kfree(connection->int_dig_vv);
+ memset(connection, 0xfc, sizeof(*connection));
kfree(connection);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2694,7 +2713,6 @@ static int init_submitter(struct drbd_device *device)
return -ENOMEM;
INIT_WORK(&device->submit.worker, do_submit);
- spin_lock_init(&device->submit.lock);
INIT_LIST_HEAD(&device->submit.writes);
return 0;
}
@@ -2764,8 +2782,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
blk_queue_merge_bvec(q, drbd_merge_bvec);
q->queue_lock = &resource->req_lock;
- device->md_io_page = alloc_page(GFP_KERNEL);
- if (!device->md_io_page)
+ device->md_io.page = alloc_page(GFP_KERNEL);
+ if (!device->md_io.page)
goto out_no_io_page;
if (drbd_bm_init(device))
@@ -2794,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
kref_get(&device->kref);
INIT_LIST_HEAD(&device->peer_devices);
+ INIT_LIST_HEAD(&device->pending_bitmap_io);
for_each_connection(connection, resource) {
peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL);
if (!peer_device)
@@ -2829,7 +2848,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
for_each_peer_device(peer_device, device)
drbd_connected(peer_device);
}
-
+ /* move to create_peer_device() */
+ for_each_peer_device(peer_device, device)
+ drbd_debugfs_peer_device_add(peer_device);
+ drbd_debugfs_device_add(device);
return NO_ERROR;
out_idr_remove_vol:
@@ -2853,7 +2875,7 @@ out_idr_remove_minor:
out_no_minor_idr:
drbd_bm_cleanup(device);
out_no_bitmap:
- __free_page(device->md_io_page);
+ __free_page(device->md_io.page);
out_no_io_page:
put_disk(disk);
out_no_disk:
@@ -2868,8 +2890,13 @@ void drbd_delete_device(struct drbd_device *device)
{
struct drbd_resource *resource = device->resource;
struct drbd_connection *connection;
+ struct drbd_peer_device *peer_device;
int refs = 3;
+ /* move to free_peer_device() */
+ for_each_peer_device(peer_device, device)
+ drbd_debugfs_peer_device_cleanup(peer_device);
+ drbd_debugfs_device_cleanup(device);
for_each_connection(connection, resource) {
idr_remove(&connection->peer_devices, device->vnr);
refs++;
@@ -2881,13 +2908,12 @@ void drbd_delete_device(struct drbd_device *device)
kref_sub(&device->kref, refs, drbd_destroy_device);
}
-int __init drbd_init(void)
+static int __init drbd_init(void)
{
int err;
if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
- printk(KERN_ERR
- "drbd: invalid minor_count (%d)\n", minor_count);
+ pr_err("invalid minor_count (%d)\n", minor_count);
#ifdef MODULE
return -EINVAL;
#else
@@ -2897,14 +2923,11 @@ int __init drbd_init(void)
err = register_blkdev(DRBD_MAJOR, "drbd");
if (err) {
- printk(KERN_ERR
- "drbd: unable to register block device major %d\n",
+ pr_err("unable to register block device major %d\n",
DRBD_MAJOR);
return err;
}
- register_reboot_notifier(&drbd_notifier);
-
/*
* allocate all necessary structs
*/
@@ -2918,7 +2941,7 @@ int __init drbd_init(void)
err = drbd_genl_register();
if (err) {
- printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+ pr_err("unable to register generic netlink family\n");
goto fail;
}
@@ -2929,38 +2952,39 @@ int __init drbd_init(void)
err = -ENOMEM;
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
if (!drbd_proc) {
- printk(KERN_ERR "drbd: unable to register proc file\n");
+ pr_err("unable to register proc file\n");
goto fail;
}
retry.wq = create_singlethread_workqueue("drbd-reissue");
if (!retry.wq) {
- printk(KERN_ERR "drbd: unable to create retry workqueue\n");
+ pr_err("unable to create retry workqueue\n");
goto fail;
}
INIT_WORK(&retry.worker, do_retry);
spin_lock_init(&retry.lock);
INIT_LIST_HEAD(&retry.writes);
- printk(KERN_INFO "drbd: initialized. "
+ if (drbd_debugfs_init())
+ pr_notice("failed to initialize debugfs -- will not be available\n");
+
+ pr_info("initialized. "
"Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
- printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
- printk(KERN_INFO "drbd: registered as block device major %d\n",
- DRBD_MAJOR);
-
+ pr_info("%s\n", drbd_buildtag());
+ pr_info("registered as block device major %d\n", DRBD_MAJOR);
return 0; /* Success! */
fail:
drbd_cleanup();
if (err == -ENOMEM)
- printk(KERN_ERR "drbd: ran out of memory\n");
+ pr_err("ran out of memory\n");
else
- printk(KERN_ERR "drbd: initialization failure\n");
+ pr_err("initialization failure\n");
return err;
}
-void drbd_free_bc(struct drbd_backing_dev *ldev)
+void drbd_free_ldev(struct drbd_backing_dev *ldev)
{
if (ldev == NULL)
return;
@@ -2972,24 +2996,29 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
kfree(ldev);
}
-void drbd_free_sock(struct drbd_connection *connection)
+static void drbd_free_one_sock(struct drbd_socket *ds)
{
- if (connection->data.socket) {
- mutex_lock(&connection->data.mutex);
- kernel_sock_shutdown(connection->data.socket, SHUT_RDWR);
- sock_release(connection->data.socket);
- connection->data.socket = NULL;
- mutex_unlock(&connection->data.mutex);
- }
- if (connection->meta.socket) {
- mutex_lock(&connection->meta.mutex);
- kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR);
- sock_release(connection->meta.socket);
- connection->meta.socket = NULL;
- mutex_unlock(&connection->meta.mutex);
+ struct socket *s;
+ mutex_lock(&ds->mutex);
+ s = ds->socket;
+ ds->socket = NULL;
+ mutex_unlock(&ds->mutex);
+ if (s) {
+ /* so debugfs does not need to mutex_lock() */
+ synchronize_rcu();
+ kernel_sock_shutdown(s, SHUT_RDWR);
+ sock_release(s);
}
}
+void drbd_free_sock(struct drbd_connection *connection)
+{
+ if (connection->data.socket)
+ drbd_free_one_sock(&connection->data);
+ if (connection->meta.socket)
+ drbd_free_one_sock(&connection->meta);
+}
+
/* meta data management */
void conn_md_sync(struct drbd_connection *connection)
@@ -3093,7 +3122,7 @@ void drbd_md_sync(struct drbd_device *device)
if (!get_ldev_if_state(device, D_FAILED))
return;
- buffer = drbd_md_get_buffer(device);
+ buffer = drbd_md_get_buffer(device, __func__);
if (!buffer)
goto out;
@@ -3253,7 +3282,7 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
if (device->state.disk != D_DISKLESS)
return ERR_DISK_CONFIGURED;
- buffer = drbd_md_get_buffer(device);
+ buffer = drbd_md_get_buffer(device, __func__);
if (!buffer)
return ERR_NOMEM;
@@ -3466,23 +3495,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
*
* Sets all bits in the bitmap and writes the whole bitmap to stable storage.
*/
-int drbd_bmio_set_n_write(struct drbd_device *device)
+int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
{
int rv = -EIO;
- if (get_ldev_if_state(device, D_ATTACHING)) {
- drbd_md_set_flag(device, MDF_FULL_SYNC);
- drbd_md_sync(device);
- drbd_bm_set_all(device);
-
- rv = drbd_bm_write(device);
+ drbd_md_set_flag(device, MDF_FULL_SYNC);
+ drbd_md_sync(device);
+ drbd_bm_set_all(device);
- if (!rv) {
- drbd_md_clear_flag(device, MDF_FULL_SYNC);
- drbd_md_sync(device);
- }
+ rv = drbd_bm_write(device);
- put_ldev(device);
+ if (!rv) {
+ drbd_md_clear_flag(device, MDF_FULL_SYNC);
+ drbd_md_sync(device);
}
return rv;
@@ -3494,18 +3519,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device)
*
* Clears all bits in the bitmap and writes the whole bitmap to stable storage.
*/
-int drbd_bmio_clear_n_write(struct drbd_device *device)
+int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
{
- int rv = -EIO;
-
drbd_resume_al(device);
- if (get_ldev_if_state(device, D_ATTACHING)) {
- drbd_bm_clear_all(device);
- rv = drbd_bm_write(device);
- put_ldev(device);
- }
-
- return rv;
+ drbd_bm_clear_all(device);
+ return drbd_bm_write(device);
}
static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3537,61 +3555,6 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
return 0;
}
-void drbd_ldev_destroy(struct drbd_device *device)
-{
- lc_destroy(device->resync);
- device->resync = NULL;
- lc_destroy(device->act_log);
- device->act_log = NULL;
- __no_warn(local,
- drbd_free_bc(device->ldev);
- device->ldev = NULL;);
-
- clear_bit(GO_DISKLESS, &device->flags);
-}
-
-static int w_go_diskless(struct drbd_work *w, int unused)
-{
- struct drbd_device *device =
- container_of(w, struct drbd_device, go_diskless);
-
- D_ASSERT(device, device->state.disk == D_FAILED);
- /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
- * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
- * the protected members anymore, though, so once put_ldev reaches zero
- * again, it will be safe to free them. */
-
- /* Try to write changed bitmap pages, read errors may have just
- * set some bits outside the area covered by the activity log.
- *
- * If we have an IO error during the bitmap writeout,
- * we will want a full sync next time, just in case.
- * (Do we want a specific meta data flag for this?)
- *
- * If that does not make it to stable storage either,
- * we cannot do anything about that anymore.
- *
- * We still need to check if both bitmap and ldev are present, we may
- * end up here after a failed attach, before ldev was even assigned.
- */
- if (device->bitmap && device->ldev) {
- /* An interrupted resync or similar is allowed to recounts bits
- * while we detach.
- * Any modifications would not be expected anymore, though.
- */
- if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
- "detach", BM_LOCKED_TEST_ALLOWED)) {
- if (test_bit(WAS_READ_ERROR, &device->flags)) {
- drbd_md_set_flag(device, MDF_FULL_SYNC);
- drbd_md_sync(device);
- }
- }
- }
-
- drbd_force_state(device, NS(disk, D_DISKLESS));
- return 0;
-}
-
/**
* drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
* @device: DRBD device.
@@ -3603,6 +3566,9 @@ static int w_go_diskless(struct drbd_work *w, int unused)
* that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
* called from worker context. It MUST NOT be used while a previous such
* work is still pending!
+ *
+ * Its worker function encloses the call of io_fn() by get_ldev() and
+ * put_ldev().
*/
void drbd_queue_bitmap_io(struct drbd_device *device,
int (*io_fn)(struct drbd_device *),
@@ -3685,25 +3651,7 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
static void md_sync_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
-
- /* must not double-queue! */
- if (list_empty(&device->md_sync_work.list))
- drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
- &device->md_sync_work);
-}
-
-static int w_md_sync(struct drbd_work *w, int unused)
-{
- struct drbd_device *device =
- container_of(w, struct drbd_device, md_sync_work);
-
- drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
-#ifdef DEBUG
- drbd_warn(device, "last md_mark_dirty: %s:%u\n",
- device->last_md_mark_dirty.func, device->last_md_mark_dirty.line);
-#endif
- drbd_md_sync(device);
- return 0;
+ drbd_device_post_work(device, MD_SYNC);
}
const char *cmdname(enum drbd_packet cmd)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3f2e1673808..1cd47df44bd 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -23,6 +23,8 @@
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/drbd.h>
#include <linux/in.h>
@@ -85,7 +87,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
{
genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
if (genlmsg_reply(skb, info))
- printk(KERN_ERR "drbd: error sending genl reply\n");
+ pr_err("error sending genl reply\n");
}
/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
@@ -558,8 +560,10 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection)
}
enum drbd_state_rv
-drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
+drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
const int max_tries = 4;
enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
struct net_conf *nc;
@@ -607,7 +611,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
- if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
+ if (conn_try_outdate_peer(connection)) {
val.disk = D_UP_TO_DATE;
mask.disk = D_MASK;
}
@@ -617,7 +621,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
if (rv == SS_NOTHING_TO_DO)
goto out;
if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
- if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
+ if (!conn_try_outdate_peer(connection) && force) {
drbd_warn(device, "Forced into split brain situation!\n");
mask.pdsk = D_MASK;
val.pdsk = D_OUTDATED;
@@ -630,7 +634,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
retry at most once more in this case. */
int timeo;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
rcu_read_unlock();
schedule_timeout_interruptible(timeo);
@@ -659,19 +663,17 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
/* FIXME also wait for all pending P_BARRIER_ACK? */
if (new_role == R_SECONDARY) {
- set_disk_ro(device->vdisk, true);
if (get_ldev(device)) {
device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
put_ldev(device);
}
} else {
- /* Called from drbd_adm_set_role only.
- * We are still holding the conf_update mutex. */
- nc = first_peer_device(device)->connection->net_conf;
+ mutex_lock(&device->resource->conf_update);
+ nc = connection->net_conf;
if (nc)
nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+ mutex_unlock(&device->resource->conf_update);
- set_disk_ro(device->vdisk, false);
if (get_ldev(device)) {
if (((device->state.conn < C_CONNECTED ||
device->state.pdsk <= D_FAILED)
@@ -689,12 +691,12 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
if (device->state.conn >= C_WF_REPORT_PARAMS) {
/* if this was forced, we should consider sync */
if (forced)
- drbd_send_uuids(first_peer_device(device));
- drbd_send_current_state(first_peer_device(device));
+ drbd_send_uuids(peer_device);
+ drbd_send_current_state(peer_device);
}
drbd_md_sync(device);
-
+ set_disk_ro(device->vdisk, new_role == R_SECONDARY);
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
out:
mutex_unlock(device->state_mutex);
@@ -891,7 +893,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
* still lock the act_log to not trigger ASSERTs there.
*/
drbd_suspend_io(device);
- buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
+ buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
if (!buffer) {
drbd_resume_io(device);
return DS_ERROR;
@@ -971,6 +973,10 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
if (la_size_changed || md_moved || rs) {
u32 prev_flags;
+ /* We do some synchronous IO below, which may take some time.
+ * Clear the timer, to avoid scary "timer expired!" messages,
+ * "Superblock" is written out at least twice below, anyways. */
+ del_timer(&device->md_sync_timer);
drbd_al_shrink(device); /* All extents inactive. */
prev_flags = md->flags;
@@ -1116,15 +1122,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
return 0;
}
-static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
+static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
+ unsigned int max_bio_size)
{
struct request_queue * const q = device->rq_queue;
unsigned int max_hw_sectors = max_bio_size >> 9;
unsigned int max_segments = 0;
struct request_queue *b = NULL;
- if (get_ldev_if_state(device, D_ATTACHING)) {
- b = device->ldev->backing_bdev->bd_disk->queue;
+ if (bdev) {
+ b = bdev->backing_bdev->bd_disk->queue;
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
rcu_read_lock();
@@ -1169,11 +1176,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
b->backing_dev_info.ra_pages);
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
}
- put_ldev(device);
}
}
-void drbd_reconsider_max_bio_size(struct drbd_device *device)
+void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
{
unsigned int now, new, local, peer;
@@ -1181,10 +1187,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
- if (get_ldev_if_state(device, D_ATTACHING)) {
- local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+ if (bdev) {
+ local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
device->local_max_bio_size = local;
- put_ldev(device);
}
local = min(local, DRBD_MAX_BIO_SIZE);
@@ -1217,7 +1222,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
if (new != now)
drbd_info(device, "max BIO size = %u\n", new);
- drbd_setup_queue_param(device, new);
+ drbd_setup_queue_param(device, bdev, new);
}
/* Starts the worker thread */
@@ -1299,6 +1304,13 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
}
+static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
+{
+ return a->disk_barrier != b->disk_barrier ||
+ a->disk_flushes != b->disk_flushes ||
+ a->disk_drain != b->disk_drain;
+}
+
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
@@ -1405,7 +1417,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
else
set_bit(MD_NO_FUA, &device->flags);
- drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+ if (write_ordering_changed(old_disk_conf, new_disk_conf))
+ drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
drbd_md_sync(device);
@@ -1440,6 +1453,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
struct drbd_device *device;
+ struct drbd_peer_device *peer_device;
+ struct drbd_connection *connection;
int err;
enum drbd_ret_code retcode;
enum determine_dev_size dd;
@@ -1462,7 +1477,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
device = adm_ctx.device;
mutex_lock(&adm_ctx.resource->adm_mutex);
- conn_reconfig_start(first_peer_device(device)->connection);
+ peer_device = first_peer_device(device);
+ connection = peer_device ? peer_device->connection : NULL;
+ conn_reconfig_start(connection);
/* if you want to reconfigure, please tear down first */
if (device->state.disk > D_DISKLESS) {
@@ -1473,7 +1490,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
* drbd_ldev_destroy is done already, we may end up here very fast,
* e.g. if someone calls attach from the on-io-error handler,
* to realize a "hot spare" feature (not that I'd recommend that) */
- wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
+ wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
/* make sure there is no leftover from previous force-detach attempts */
clear_bit(FORCE_DETACH, &device->flags);
@@ -1529,7 +1546,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
goto fail;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
if (nc) {
if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
rcu_read_unlock();
@@ -1649,7 +1666,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
*/
wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
/* and for any other previously queued work */
- drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
+ drbd_flush_workqueue(&connection->sender_work);
rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
retcode = rv; /* FIXME: Type mismatch. */
@@ -1710,7 +1727,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
new_disk_conf = NULL;
new_plan = NULL;
- drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+ drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1726,7 +1743,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
device->read_cnt = 0;
device->writ_cnt = 0;
- drbd_reconsider_max_bio_size(device);
+ drbd_reconsider_max_bio_size(device, device->ldev);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
@@ -1845,7 +1862,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
put_ldev(device);
- conn_reconfig_done(first_peer_device(device)->connection);
+ conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -1856,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
drbd_force_state(device, NS(disk, D_DISKLESS));
drbd_md_sync(device);
fail:
- conn_reconfig_done(first_peer_device(device)->connection);
+ conn_reconfig_done(connection);
if (nbc) {
if (nbc->backing_bdev)
blkdev_put(nbc->backing_bdev,
@@ -1888,7 +1905,7 @@ static int adm_detach(struct drbd_device *device, int force)
}
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
+ drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
retcode = drbd_request_state(device, NS(disk, D_FAILED));
drbd_md_put_buffer(device);
/* D_FAILED will transition to DISKLESS. */
@@ -2654,8 +2671,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
if (retcode != NO_ERROR)
goto out;
- mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
+ if (!get_ldev(device)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync.
@@ -2679,6 +2701,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ put_ldev(device);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -2704,7 +2727,7 @@ out:
return 0;
}
-static int drbd_bmio_set_susp_al(struct drbd_device *device)
+static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
{
int rv;
@@ -2725,8 +2748,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
if (retcode != NO_ERROR)
goto out;
- mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
+ if (!get_ldev(device)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync.
@@ -2753,6 +2781,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ put_ldev(device);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -2892,7 +2921,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc
return list_first_entry(&resource->connections, struct drbd_connection, connections);
}
-int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
+static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
const struct sib_info *sib)
{
struct drbd_resource *resource = device->resource;
@@ -3622,13 +3651,6 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
unsigned seq;
int err = -ENOMEM;
- if (sib->sib_reason == SIB_SYNC_PROGRESS) {
- if (time_after(jiffies, device->rs_last_bcast + HZ))
- device->rs_last_bcast = jiffies;
- else
- return;
- }
-
seq = atomic_inc_return(&drbd_genl_seq);
msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
if (!msg)
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 89736bdbbc7..06e6147c760 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
seq_printf(seq, "%ld", v);
}
+static void drbd_get_syncer_progress(struct drbd_device *device,
+ union drbd_dev_state state, unsigned long *rs_total,
+ unsigned long *bits_left, unsigned int *per_mil_done)
+{
+ /* this is to break it at compile time when we change that, in case we
+ * want to support more than (1<<32) bits on a 32bit arch. */
+ typecheck(unsigned long, device->rs_total);
+ *rs_total = device->rs_total;
+
+ /* note: both rs_total and rs_left are in bits, i.e. in
+ * units of BM_BLOCK_SIZE.
+ * for the percentage, we don't care. */
+
+ if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
+ *bits_left = device->ov_left;
+ else
+ *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
+ /* >> 10 to prevent overflow,
+ * +1 to prevent division by zero */
+ if (*bits_left > *rs_total) {
+ /* D'oh. Maybe a logic bug somewhere. More likely just a race
+ * between state change and reset of rs_total.
+ */
+ *bits_left = *rs_total;
+ *per_mil_done = *rs_total ? 0 : 1000;
+ } else {
+ /* Make sure the division happens in long context.
+ * We allow up to one petabyte storage right now,
+ * at a granularity of 4k per bit that is 2**38 bits.
+ * After shift right and multiplication by 1000,
+ * this should still fit easily into a 32bit long,
+ * so we don't need a 64bit division on 32bit arch.
+ * Note: currently we don't support such large bitmaps on 32bit
+ * arch anyways, but no harm done to be prepared for it here.
+ */
+ unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
+ unsigned long left = *bits_left >> shift;
+ unsigned long total = 1UL + (*rs_total >> shift);
+ unsigned long tmp = 1000UL - left * 1000UL/total;
+ *per_mil_done = tmp;
+ }
+}
+
+
/*lge
* progress bars shamelessly adapted from driver/md/md.c
* output looks like
* [=====>..............] 33.5% (23456/123456)
* finish: 2:20:20 speed: 6,345 (6,456) K/sec
*/
-static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq)
+static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
+ union drbd_dev_state state)
{
- unsigned long db, dt, dbdt, rt, rs_left;
+ unsigned long db, dt, dbdt, rt, rs_total, rs_left;
unsigned int res;
int i, x, y;
int stalled = 0;
- drbd_get_syncer_progress(device, &rs_left, &res);
+ drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
x = res/50;
y = 20-x;
@@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_printf(seq, ".");
seq_printf(seq, "] ");
- if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
+ if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
seq_printf(seq, "verified:");
else
seq_printf(seq, "sync'ed:");
seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
/* if more than a few GB, display in MB */
- if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
+ if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
seq_printf(seq, "(%lu/%lu)M",
(unsigned long) Bit2KB(rs_left >> 10),
- (unsigned long) Bit2KB(device->rs_total >> 10));
+ (unsigned long) Bit2KB(rs_total >> 10));
else
seq_printf(seq, "(%lu/%lu)K\n\t",
(unsigned long) Bit2KB(rs_left),
- (unsigned long) Bit2KB(device->rs_total));
+ (unsigned long) Bit2KB(rs_total));
/* see drivers/md/md.c
* We do not want to overflow, so the order of operands and
@@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
if (dt == 0)
dt = 1;
- db = device->rs_total - rs_left;
+ db = rs_total - rs_left;
dbdt = Bit2KB(db/dt);
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, ")");
- if (device->state.conn == C_SYNC_TARGET ||
- device->state.conn == C_VERIFY_S) {
+ if (state.conn == C_SYNC_TARGET ||
+ state.conn == C_VERIFY_S) {
seq_printf(seq, " want: ");
seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
}
@@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
unsigned long bm_bits = drbd_bm_bits(device);
unsigned long bit_pos;
unsigned long long stop_sector = 0;
- if (device->state.conn == C_VERIFY_S ||
- device->state.conn == C_VERIFY_T) {
+ if (state.conn == C_VERIFY_S ||
+ state.conn == C_VERIFY_T) {
bit_pos = bm_bits - device->ov_left;
if (verify_can_do_stop_sector(device))
stop_sector = device->ov_stop_sector;
@@ -188,22 +233,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
}
}
-static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
-{
- struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
-
- seq_printf(seq, "%5d %s %s\n", bme->rs_left,
- bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
- bme->flags & BME_LOCKED ? "LOCKED" : "------"
- );
-}
-
static int drbd_seq_show(struct seq_file *seq, void *v)
{
int i, prev_i = -1;
const char *sn;
struct drbd_device *device;
struct net_conf *nc;
+ union drbd_dev_state state;
char wp;
static char write_ordering_chars[] = {
@@ -241,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
prev_i = i;
- sn = drbd_conn_str(device->state.conn);
+ state = device->state;
+ sn = drbd_conn_str(state.conn);
- if (device->state.conn == C_STANDALONE &&
- device->state.disk == D_DISKLESS &&
- device->state.role == R_SECONDARY) {
+ if (state.conn == C_STANDALONE &&
+ state.disk == D_DISKLESS &&
+ state.role == R_SECONDARY) {
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
/* reset device->congestion_reason */
@@ -258,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
"lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
i, sn,
- drbd_role_str(device->state.role),
- drbd_role_str(device->state.peer),
- drbd_disk_str(device->state.disk),
- drbd_disk_str(device->state.pdsk),
+ drbd_role_str(state.role),
+ drbd_role_str(state.peer),
+ drbd_disk_str(state.disk),
+ drbd_disk_str(state.pdsk),
wp,
drbd_suspended(device) ? 's' : 'r',
- device->state.aftr_isp ? 'a' : '-',
- device->state.peer_isp ? 'p' : '-',
- device->state.user_isp ? 'u' : '-',
+ state.aftr_isp ? 'a' : '-',
+ state.peer_isp ? 'p' : '-',
+ state.user_isp ? 'u' : '-',
device->congestion_reason ?: '-',
test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
device->send_cnt/2,
@@ -281,17 +318,17 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
atomic_read(&device->unacked_cnt),
atomic_read(&device->ap_bio_cnt),
first_peer_device(device)->connection->epochs,
- write_ordering_chars[first_peer_device(device)->connection->write_ordering]
+ write_ordering_chars[device->resource->write_ordering]
);
seq_printf(seq, " oos:%llu\n",
Bit2KB((unsigned long long)
drbd_bm_total_weight(device)));
}
- if (device->state.conn == C_SYNC_SOURCE ||
- device->state.conn == C_SYNC_TARGET ||
- device->state.conn == C_VERIFY_S ||
- device->state.conn == C_VERIFY_T)
- drbd_syncer_progress(device, seq);
+ if (state.conn == C_SYNC_SOURCE ||
+ state.conn == C_SYNC_TARGET ||
+ state.conn == C_VERIFY_S ||
+ state.conn == C_VERIFY_T)
+ drbd_syncer_progress(device, seq, state);
if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
lc_seq_printf_stats(seq, device->resync);
@@ -299,12 +336,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
put_ldev(device);
}
- if (proc_details >= 2) {
- if (device->resync) {
- lc_seq_dump_details(seq, device->resync, "rs_left",
- resync_dump_detail);
- }
- }
+ if (proc_details >= 2)
+ seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
}
rcu_read_unlock();
@@ -316,7 +349,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file)
int err;
if (try_module_get(THIS_MODULE)) {
- err = single_open(file, drbd_seq_show, PDE_DATA(inode));
+ err = single_open(file, drbd_seq_show, NULL);
if (err)
module_put(THIS_MODULE);
return err;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5b17ec88ea0..9342b8da73a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -362,17 +362,14 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
goto fail;
}
+ memset(peer_req, 0, sizeof(*peer_req));
+ INIT_LIST_HEAD(&peer_req->w.list);
drbd_clear_interval(&peer_req->i);
peer_req->i.size = data_size;
peer_req->i.sector = sector;
- peer_req->i.local = false;
- peer_req->i.waiting = false;
-
- peer_req->epoch = NULL;
+ peer_req->submit_jif = jiffies;
peer_req->peer_device = peer_device;
peer_req->pages = page;
- atomic_set(&peer_req->pending_bios, 0);
- peer_req->flags = 0;
/*
* The block_id is opaque to the receiver. It is not endianness
* converted, and sent back to the sender unchanged.
@@ -389,11 +386,16 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
int is_net)
{
+ might_sleep();
if (peer_req->flags & EE_HAS_DIGEST)
kfree(peer_req->digest);
drbd_free_pages(device, peer_req->pages, is_net);
D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
+ if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
+ peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
+ drbd_al_complete_io(device, &peer_req->i);
+ }
mempool_free(peer_req, drbd_ee_mempool);
}
@@ -791,8 +793,18 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
{
unsigned int header_size = drbd_header_size(connection);
struct packet_info pi;
+ struct net_conf *nc;
int err;
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ if (!nc) {
+ rcu_read_unlock();
+ return -EIO;
+ }
+ sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
+ rcu_read_unlock();
+
err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
if (err != header_size) {
if (err >= 0)
@@ -809,7 +821,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
* drbd_socket_okay() - Free the socket if its connection is not okay
* @sock: pointer to the pointer to the socket.
*/
-static int drbd_socket_okay(struct socket **sock)
+static bool drbd_socket_okay(struct socket **sock)
{
int rr;
char tb[4];
@@ -827,6 +839,30 @@ static int drbd_socket_okay(struct socket **sock)
return false;
}
}
+
+static bool connection_established(struct drbd_connection *connection,
+ struct socket **sock1,
+ struct socket **sock2)
+{
+ struct net_conf *nc;
+ int timeout;
+ bool ok;
+
+ if (!*sock1 || !*sock2)
+ return false;
+
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
+ rcu_read_unlock();
+ schedule_timeout_interruptible(timeout);
+
+ ok = drbd_socket_okay(sock1);
+ ok = drbd_socket_okay(sock2) && ok;
+
+ return ok;
+}
+
/* Gets called if a connection is established, or if a new minor gets created
in a connection */
int drbd_connected(struct drbd_peer_device *peer_device)
@@ -868,8 +904,8 @@ static int conn_connect(struct drbd_connection *connection)
struct drbd_socket sock, msock;
struct drbd_peer_device *peer_device;
struct net_conf *nc;
- int vnr, timeout, h, ok;
- bool discard_my_data;
+ int vnr, timeout, h;
+ bool discard_my_data, ok;
enum drbd_state_rv rv;
struct accept_wait_data ad = {
.connection = connection,
@@ -913,17 +949,8 @@ static int conn_connect(struct drbd_connection *connection)
}
}
- if (sock.socket && msock.socket) {
- rcu_read_lock();
- nc = rcu_dereference(connection->net_conf);
- timeout = nc->ping_timeo * HZ / 10;
- rcu_read_unlock();
- schedule_timeout_interruptible(timeout);
- ok = drbd_socket_okay(&sock.socket);
- ok = drbd_socket_okay(&msock.socket) && ok;
- if (ok)
- break;
- }
+ if (connection_established(connection, &sock.socket, &msock.socket))
+ break;
retry:
s = drbd_wait_for_connect(connection, &ad);
@@ -969,8 +996,7 @@ randomize:
goto out_release_sockets;
}
- ok = drbd_socket_okay(&sock.socket);
- ok = drbd_socket_okay(&msock.socket) && ok;
+ ok = connection_established(connection, &sock.socket, &msock.socket);
} while (!ok);
if (ad.s_listen)
@@ -1151,7 +1177,7 @@ static void drbd_flush(struct drbd_connection *connection)
struct drbd_peer_device *peer_device;
int vnr;
- if (connection->write_ordering >= WO_bdev_flush) {
+ if (connection->resource->write_ordering >= WO_bdev_flush) {
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
@@ -1161,14 +1187,22 @@ static void drbd_flush(struct drbd_connection *connection)
kref_get(&device->kref);
rcu_read_unlock();
+ /* Right now, we have only this one synchronous code path
+ * for flushes between request epochs.
+ * We may want to make those asynchronous,
+ * or at least parallelize the flushes to the volume devices.
+ */
+ device->flush_jif = jiffies;
+ set_bit(FLUSH_PENDING, &device->flags);
rv = blkdev_issue_flush(device->ldev->backing_bdev,
GFP_NOIO, NULL);
+ clear_bit(FLUSH_PENDING, &device->flags);
if (rv) {
drbd_info(device, "local disk flush failed with status %d\n", rv);
/* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0
* if (rv == -EOPNOTSUPP) */
- drbd_bump_write_ordering(connection, WO_drain_io);
+ drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
}
put_ldev(device);
kref_put(&device->kref, drbd_destroy_device);
@@ -1257,15 +1291,30 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
return rv;
}
+static enum write_ordering_e
+max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
+{
+ struct disk_conf *dc;
+
+ dc = rcu_dereference(bdev->disk_conf);
+
+ if (wo == WO_bdev_flush && !dc->disk_flushes)
+ wo = WO_drain_io;
+ if (wo == WO_drain_io && !dc->disk_drain)
+ wo = WO_none;
+
+ return wo;
+}
+
/**
* drbd_bump_write_ordering() - Fall back to an other write ordering method
* @connection: DRBD connection.
* @wo: Write ordering method to try.
*/
-void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+ enum write_ordering_e wo)
{
- struct disk_conf *dc;
- struct drbd_peer_device *peer_device;
+ struct drbd_device *device;
enum write_ordering_e pwo;
int vnr;
static char *write_ordering_str[] = {
@@ -1274,26 +1323,27 @@ void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ord
[WO_bdev_flush] = "flush",
};
- pwo = connection->write_ordering;
- wo = min(pwo, wo);
+ pwo = resource->write_ordering;
+ if (wo != WO_bdev_flush)
+ wo = min(pwo, wo);
rcu_read_lock();
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
+ idr_for_each_entry(&resource->devices, device, vnr) {
+ if (get_ldev(device)) {
+ wo = max_allowed_wo(device->ldev, wo);
+ if (device->ldev == bdev)
+ bdev = NULL;
+ put_ldev(device);
+ }
+ }
- if (!get_ldev_if_state(device, D_ATTACHING))
- continue;
- dc = rcu_dereference(device->ldev->disk_conf);
+ if (bdev)
+ wo = max_allowed_wo(bdev, wo);
- if (wo == WO_bdev_flush && !dc->disk_flushes)
- wo = WO_drain_io;
- if (wo == WO_drain_io && !dc->disk_drain)
- wo = WO_none;
- put_ldev(device);
- }
rcu_read_unlock();
- connection->write_ordering = wo;
- if (pwo != connection->write_ordering || wo == WO_bdev_flush)
- drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
+
+ resource->write_ordering = wo;
+ if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+ drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
/**
@@ -1330,6 +1380,13 @@ int drbd_submit_peer_request(struct drbd_device *device,
/* wait for all pending IO completions, before we start
* zeroing things out. */
conn_wait_active_ee_empty(first_peer_device(device)->connection);
+ /* add it to the active list now,
+ * so we can find it to present it in debugfs */
+ peer_req->submit_jif = jiffies;
+ peer_req->flags |= EE_SUBMITTED;
+ spin_lock_irq(&device->resource->req_lock);
+ list_add_tail(&peer_req->w.list, &device->active_ee);
+ spin_unlock_irq(&device->resource->req_lock);
if (blkdev_issue_zeroout(device->ldev->backing_bdev,
sector, ds >> 9, GFP_NOIO))
peer_req->flags |= EE_WAS_ERROR;
@@ -1398,6 +1455,9 @@ submit:
D_ASSERT(device, page == NULL);
atomic_set(&peer_req->pending_bios, n_bios);
+ /* for debugfs: update timestamp, mark as submitted */
+ peer_req->submit_jif = jiffies;
+ peer_req->flags |= EE_SUBMITTED;
do {
bio = bios;
bios = bios->bi_next;
@@ -1471,7 +1531,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
* R_PRIMARY crashes now.
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
- switch (connection->write_ordering) {
+ switch (connection->resource->write_ordering) {
case WO_none:
if (rv == FE_RECYCLED)
return 0;
@@ -1498,7 +1558,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
return 0;
default:
- drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
+ drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
+ connection->resource->write_ordering);
return -EIO;
}
@@ -1531,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
struct drbd_peer_request *peer_req;
struct page *page;
int dgs, ds, err;
- int data_size = pi->size;
+ unsigned int data_size = pi->size;
void *dig_in = peer_device->connection->int_dig_in;
void *dig_vv = peer_device->connection->int_dig_vv;
unsigned long *data;
@@ -1578,6 +1639,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
if (!peer_req)
return NULL;
+ peer_req->flags |= EE_WRITE;
if (trim)
return peer_req;
@@ -1734,9 +1796,10 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
* respective _drbd_clear_done_ee */
peer_req->w.cb = e_end_resync_block;
+ peer_req->submit_jif = jiffies;
spin_lock_irq(&device->resource->req_lock);
- list_add(&peer_req->w.list, &device->sync_ee);
+ list_add_tail(&peer_req->w.list, &device->sync_ee);
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
@@ -1889,6 +1952,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
}
dec_unacked(device);
}
+
/* we delete from the conflict detection hash _after_ we sent out the
* P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
if (peer_req->flags & EE_IN_INTERVAL_TREE) {
@@ -2115,6 +2179,8 @@ static int handle_write_conflicts(struct drbd_device *device,
drbd_for_each_overlap(i, &device->write_requests, sector, size) {
if (i == &peer_req->i)
continue;
+ if (i->completed)
+ continue;
if (!i->local) {
/*
@@ -2147,7 +2213,6 @@ static int handle_write_conflicts(struct drbd_device *device,
(unsigned long long)sector, size,
superseded ? "local" : "remote");
- inc_unacked(device);
peer_req->w.cb = superseded ? e_send_superseded :
e_send_retry_write;
list_add_tail(&peer_req->w.list, &device->done_ee);
@@ -2206,6 +2271,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
{
struct drbd_peer_device *peer_device;
struct drbd_device *device;
+ struct net_conf *nc;
sector_t sector;
struct drbd_peer_request *peer_req;
struct p_data *p = pi->data;
@@ -2245,6 +2311,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
}
peer_req->w.cb = e_end_block;
+ peer_req->submit_jif = jiffies;
+ peer_req->flags |= EE_APPLICATION;
dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(dp_flags);
@@ -2271,9 +2339,36 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
spin_unlock(&connection->epoch_lock);
rcu_read_lock();
- tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
+ nc = rcu_dereference(peer_device->connection->net_conf);
+ tp = nc->two_primaries;
+ if (peer_device->connection->agreed_pro_version < 100) {
+ switch (nc->wire_protocol) {
+ case DRBD_PROT_C:
+ dp_flags |= DP_SEND_WRITE_ACK;
+ break;
+ case DRBD_PROT_B:
+ dp_flags |= DP_SEND_RECEIVE_ACK;
+ break;
+ }
+ }
rcu_read_unlock();
+
+ if (dp_flags & DP_SEND_WRITE_ACK) {
+ peer_req->flags |= EE_SEND_WRITE_ACK;
+ inc_unacked(device);
+ /* corresponding dec_unacked() in e_end_block()
+ * respective _drbd_clear_done_ee */
+ }
+
+ if (dp_flags & DP_SEND_RECEIVE_ACK) {
+ /* I really don't like it that the receiver thread
+ * sends on the msock, but anyways */
+ drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+ }
+
if (tp) {
+ /* two primaries implies protocol C */
+ D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
peer_req->flags |= EE_IN_INTERVAL_TREE;
err = wait_for_and_update_peer_seq(peer_device, peer_seq);
if (err)
@@ -2297,44 +2392,18 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
* active_ee to become empty in drbd_submit_peer_request();
* better not add ourselves here. */
if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
- list_add(&peer_req->w.list, &device->active_ee);
+ list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
if (device->state.conn == C_SYNC_TARGET)
wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
- if (peer_device->connection->agreed_pro_version < 100) {
- rcu_read_lock();
- switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
- case DRBD_PROT_C:
- dp_flags |= DP_SEND_WRITE_ACK;
- break;
- case DRBD_PROT_B:
- dp_flags |= DP_SEND_RECEIVE_ACK;
- break;
- }
- rcu_read_unlock();
- }
-
- if (dp_flags & DP_SEND_WRITE_ACK) {
- peer_req->flags |= EE_SEND_WRITE_ACK;
- inc_unacked(device);
- /* corresponding dec_unacked() in e_end_block()
- * respective _drbd_clear_done_ee */
- }
-
- if (dp_flags & DP_SEND_RECEIVE_ACK) {
- /* I really don't like it that the receiver thread
- * sends on the msock, but anyways */
- drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
- }
-
if (device->state.pdsk < D_INCONSISTENT) {
/* In case we have the only disk of the cluster, */
drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
- peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
- drbd_al_begin_io(device, &peer_req->i, true);
+ drbd_al_begin_io(device, &peer_req->i);
+ peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
}
err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
@@ -2347,8 +2416,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
list_del(&peer_req->w.list);
drbd_remove_epoch_entry_interval(device, peer_req);
spin_unlock_irq(&device->resource->req_lock);
- if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
+ if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
+ peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
drbd_al_complete_io(device, &peer_req->i);
+ }
out_interrupted:
drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
@@ -2368,13 +2439,14 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster.
*/
-bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+ bool throttle_if_app_is_waiting)
{
struct lc_element *tmp;
- bool throttle = true;
+ bool throttle = drbd_rs_c_min_rate_throttle(device);
- if (!drbd_rs_c_min_rate_throttle(device))
- return false;
+ if (!throttle || throttle_if_app_is_waiting)
+ return throttle;
spin_lock_irq(&device->al_lock);
tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
@@ -2382,7 +2454,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_PRIORITY, &bm_ext->flags))
throttle = false;
- /* Do not slow down if app IO is already waiting for this extent */
+ /* Do not slow down if app IO is already waiting for this extent,
+ * and our progress is necessary for application IO to complete. */
}
spin_unlock_irq(&device->al_lock);
@@ -2407,7 +2480,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
(int)part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&device->rs_sect_ev);
- if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
+
+ if (atomic_read(&device->ap_actlog_cnt)
+ || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
unsigned long rs_left;
int i;
@@ -2508,6 +2583,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
peer_req->w.cb = w_e_end_data_req;
fault_type = DRBD_FAULT_DT_RD;
/* application IO, don't drbd_rs_begin_io */
+ peer_req->flags |= EE_APPLICATION;
goto submit;
case P_RS_DATA_REQUEST:
@@ -2538,6 +2614,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
peer_req->w.cb = w_e_end_csum_rs_req;
/* used in the sector offset progress display */
device->bm_resync_fo = BM_SECT_TO_BIT(sector);
+ /* remember to report stats in drbd_resync_finished */
+ device->use_csums = true;
} else if (pi->cmd == P_OV_REPLY) {
/* track progress, we may need to throttle */
atomic_add(size >> 9, &device->rs_sect_in);
@@ -2595,8 +2673,20 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
* we would also throttle its application reads.
* In that case, throttling is done on the SyncTarget only.
*/
- if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
+
+ /* Even though this may be a resync request, we do add to "read_ee";
+ * "sync_ee" is only used for resync WRITEs.
+ * Add to list early, so debugfs can find this request
+ * even if we have to sleep below. */
+ spin_lock_irq(&device->resource->req_lock);
+ list_add_tail(&peer_req->w.list, &device->read_ee);
+ spin_unlock_irq(&device->resource->req_lock);
+
+ update_receiver_timing_details(connection, drbd_rs_should_slow_down);
+ if (device->state.peer != R_PRIMARY
+ && drbd_rs_should_slow_down(device, sector, false))
schedule_timeout_uninterruptible(HZ/10);
+ update_receiver_timing_details(connection, drbd_rs_begin_io);
if (drbd_rs_begin_io(device, sector))
goto out_free_e;
@@ -2604,22 +2694,20 @@ submit_for_resync:
atomic_add(size >> 9, &device->rs_sect_ev);
submit:
+ update_receiver_timing_details(connection, drbd_submit_peer_request);
inc_unacked(device);
- spin_lock_irq(&device->resource->req_lock);
- list_add_tail(&peer_req->w.list, &device->read_ee);
- spin_unlock_irq(&device->resource->req_lock);
-
if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
return 0;
/* don't care for the reason here */
drbd_err(device, "submit failed, triggering re-connect\n");
+
+out_free_e:
spin_lock_irq(&device->resource->req_lock);
list_del(&peer_req->w.list);
spin_unlock_irq(&device->resource->req_lock);
/* no drbd_rs_complete_io(), we are dropping the connection anyways */
-out_free_e:
put_ldev(device);
drbd_free_peer_req(device, peer_req);
return -EIO;
@@ -2842,8 +2930,10 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
-1091 requires proto 91
-1096 requires proto 96
*/
-static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
+static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
u64 self, peer;
int i, j;
@@ -2869,7 +2959,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
@@ -2892,7 +2982,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
@@ -2925,7 +3015,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
case 1: /* self_pri && !peer_pri */ return 1;
case 2: /* !self_pri && peer_pri */ return -1;
case 3: /* self_pri && peer_pri */
- dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
+ dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
return dc ? -1 : 1;
}
}
@@ -2938,14 +3028,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
*rule_nr = 51;
peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
if (self == peer) {
- if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
+ if (connection->agreed_pro_version < 96 ?
(device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
(device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
/* The last P_SYNC_UUID did not get though. Undo the last start of
resync as sync source modifications of the peer's UUIDs. */
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
@@ -2975,14 +3065,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
*rule_nr = 71;
self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
if (self == peer) {
- if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
+ if (connection->agreed_pro_version < 96 ?
(device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
(device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
/* The last P_SYNC_UUID did not get though. Undo the last start of
resync as sync source modifications of our UUIDs. */
- if (first_peer_device(device)->connection->agreed_pro_version < 91)
+ if (connection->agreed_pro_version < 91)
return -1091;
__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
@@ -3352,8 +3442,7 @@ disconnect:
* return: NULL (alg name was "")
* ERR_PTR(error) if something goes wrong
* or the crypto hash ptr, if it worked out ok. */
-static
-struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
+static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
const char *alg, const char *name)
{
struct crypto_hash *tfm;
@@ -3639,7 +3728,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
struct drbd_device *device;
struct p_sizes *p = pi->data;
enum determine_dev_size dd = DS_UNCHANGED;
- sector_t p_size, p_usize, my_usize;
+ sector_t p_size, p_usize, p_csize, my_usize;
int ldsc = 0; /* local disk size changed */
enum dds_flags ddsf;
@@ -3650,6 +3739,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
p_size = be64_to_cpu(p->d_size);
p_usize = be64_to_cpu(p->u_size);
+ p_csize = be64_to_cpu(p->c_size);
/* just store the peer's disk size for now.
* we still need to figure out whether we accept that. */
@@ -3710,7 +3800,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
}
device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
- drbd_reconsider_max_bio_size(device);
/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
In case we cleared the QUEUE_FLAG_DISCARD from our queue in
drbd_reconsider_max_bio_size(), we can be sure that after
@@ -3718,14 +3807,28 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(device)) {
+ drbd_reconsider_max_bio_size(device, device->ldev);
dd = drbd_determine_dev_size(device, ddsf, NULL);
put_ldev(device);
if (dd == DS_ERROR)
return -EIO;
drbd_md_sync(device);
} else {
- /* I am diskless, need to accept the peer's size. */
- drbd_set_my_capacity(device, p_size);
+ /*
+ * I am diskless, need to accept the peer's *current* size.
+ * I must NOT accept the peers backing disk size,
+ * it may have been larger than mine all along...
+ *
+ * At this point, the peer knows more about my disk, or at
+ * least about what we last agreed upon, than myself.
+ * So if his c_size is less than his d_size, the most likely
+ * reason is that *my* d_size was smaller last time we checked.
+ *
+ * However, if he sends a zero current size,
+ * take his (user-capped or) backing disk size anyways.
+ */
+ drbd_reconsider_max_bio_size(device, NULL);
+ drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
}
if (get_ldev(device)) {
@@ -4501,6 +4604,7 @@ static void drbdd(struct drbd_connection *connection)
struct data_cmd *cmd;
drbd_thread_current_set_cpu(&connection->receiver);
+ update_receiver_timing_details(connection, drbd_recv_header);
if (drbd_recv_header(connection, &pi))
goto err_out;
@@ -4519,12 +4623,14 @@ static void drbdd(struct drbd_connection *connection)
}
if (shs) {
+ update_receiver_timing_details(connection, drbd_recv_all_warn);
err = drbd_recv_all_warn(connection, pi.data, shs);
if (err)
goto err_out;
pi.size -= shs;
}
+ update_receiver_timing_details(connection, cmd->fn);
err = cmd->fn(connection, &pi);
if (err) {
drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 09803d0d520..c67717d572d 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -52,7 +52,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request
static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
{
int rw = bio_data_dir(req->master_bio);
- unsigned long duration = jiffies - req->start_time;
+ unsigned long duration = jiffies - req->start_jif;
int cpu;
cpu = part_stat_lock();
part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration);
@@ -66,7 +66,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
{
struct drbd_request *req;
- req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+ req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO);
if (!req)
return NULL;
@@ -84,6 +84,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
+ INIT_LIST_HEAD(&req->req_pending_master_completion);
+ INIT_LIST_HEAD(&req->req_pending_local);
/* one reference to be put by __drbd_make_request */
atomic_set(&req->completion_ref, 1);
@@ -92,6 +94,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
return req;
}
+static void drbd_remove_request_interval(struct rb_root *root,
+ struct drbd_request *req)
+{
+ struct drbd_device *device = req->device;
+ struct drbd_interval *i = &req->i;
+
+ drbd_remove_interval(root, i);
+
+ /* Wake up any processes waiting for this request to complete. */
+ if (i->waiting)
+ wake_up(&device->misc_wait);
+}
+
void drbd_req_destroy(struct kref *kref)
{
struct drbd_request *req = container_of(kref, struct drbd_request, kref);
@@ -107,14 +122,30 @@ void drbd_req_destroy(struct kref *kref)
return;
}
- /* remove it from the transfer log.
- * well, only if it had been there in the first
- * place... if it had not (local only or conflicting
- * and never sent), it should still be "empty" as
- * initialized in drbd_req_new(), so we can list_del() it
- * here unconditionally */
+ /* If called from mod_rq_state (expected normal case) or
+ * drbd_send_and_submit (the less likely normal path), this holds the
+ * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
+ * though it may be still empty (never added to the transfer log).
+ *
+ * If called from do_retry(), we do NOT hold the req_lock, but we are
+ * still allowed to unconditionally list_del(&req->tl_requests),
+ * because it will be on a local on-stack list only. */
list_del_init(&req->tl_requests);
+ /* finally remove the request from the conflict detection
+ * respective block_id verification interval tree. */
+ if (!drbd_interval_empty(&req->i)) {
+ struct rb_root *root;
+
+ if (s & RQ_WRITE)
+ root = &device->write_requests;
+ else
+ root = &device->read_requests;
+ drbd_remove_request_interval(root, req);
+ } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
+ drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
+ s, (unsigned long long)req->i.sector, req->i.size);
+
/* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to
* release the reference to the activity log. */
@@ -188,19 +219,6 @@ void complete_master_bio(struct drbd_device *device,
}
-static void drbd_remove_request_interval(struct rb_root *root,
- struct drbd_request *req)
-{
- struct drbd_device *device = req->device;
- struct drbd_interval *i = &req->i;
-
- drbd_remove_interval(root, i);
-
- /* Wake up any processes waiting for this request to complete. */
- if (i->waiting)
- wake_up(&device->misc_wait);
-}
-
/* Helper for __req_mod().
* Set m->bio to the master bio, if it is fit to be completed,
* or leave it alone (it is initialized to NULL in __req_mod),
@@ -254,18 +272,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
error = PTR_ERR(req->private_bio);
- /* remove the request from the conflict detection
- * respective block_id verification hash */
- if (!drbd_interval_empty(&req->i)) {
- struct rb_root *root;
-
- if (rw == WRITE)
- root = &device->write_requests;
- else
- root = &device->read_requests;
- drbd_remove_request_interval(root, req);
- }
-
/* Before we can signal completion to the upper layers,
* we may need to close the current transfer log epoch.
* We are within the request lock, so we can simply compare
@@ -301,9 +307,24 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
m->error = ok ? 0 : (error ?: -EIO);
m->bio = req->master_bio;
req->master_bio = NULL;
+ /* We leave it in the tree, to be able to verify later
+ * write-acks in protocol != C during resync.
+ * But we mark it as "complete", so it won't be counted as
+ * conflict in a multi-primary setup. */
+ req->i.completed = true;
}
+
+ if (req->i.waiting)
+ wake_up(&device->misc_wait);
+
+ /* Either we are about to complete to upper layers,
+ * or we will restart this request.
+ * In either case, the request object will be destroyed soon,
+ * so better remove it from all lists. */
+ list_del_init(&req->req_pending_master_completion);
}
+/* still holds resource->req_lock */
static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
{
struct drbd_device *device = req->device;
@@ -324,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
return 1;
}
+static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_next == NULL)
+ connection->req_next = req;
+}
+
+static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_next != req)
+ return;
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+ const unsigned s = req->rq_state;
+ if (s & RQ_NET_QUEUED)
+ break;
+ }
+ if (&req->tl_requests == &connection->transfer_log)
+ req = NULL;
+ connection->req_next = req;
+}
+
+static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_ack_pending == NULL)
+ connection->req_ack_pending = req;
+}
+
+static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_ack_pending != req)
+ return;
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+ const unsigned s = req->rq_state;
+ if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
+ break;
+ }
+ if (&req->tl_requests == &connection->transfer_log)
+ req = NULL;
+ connection->req_ack_pending = req;
+}
+
+static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_not_net_done == NULL)
+ connection->req_not_net_done = req;
+}
+
+static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ if (!connection)
+ return;
+ if (connection->req_not_net_done != req)
+ return;
+ list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+ const unsigned s = req->rq_state;
+ if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
+ break;
+ }
+ if (&req->tl_requests == &connection->transfer_log)
+ req = NULL;
+ connection->req_not_net_done = req;
+}
+
/* I'd like this to be the only place that manipulates
* req->completion_ref and req->kref. */
static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
int clear, int set)
{
struct drbd_device *device = req->device;
+ struct drbd_peer_device *peer_device = first_peer_device(device);
unsigned s = req->rq_state;
int c_put = 0;
int k_put = 0;
@@ -356,14 +456,23 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
atomic_inc(&req->completion_ref);
}
- if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED))
+ if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
atomic_inc(&req->completion_ref);
+ set_if_null_req_next(peer_device, req);
+ }
if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
kref_get(&req->kref); /* wait for the DONE */
- if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT))
- atomic_add(req->i.size >> 9, &device->ap_in_flight);
+ if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
+ /* potentially already completed in the asender thread */
+ if (!(s & RQ_NET_DONE)) {
+ atomic_add(req->i.size >> 9, &device->ap_in_flight);
+ set_if_null_req_not_net_done(peer_device, req);
+ }
+ if (s & RQ_NET_PENDING)
+ set_if_null_req_ack_pending(peer_device, req);
+ }
if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
atomic_inc(&req->completion_ref);
@@ -386,20 +495,34 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
++k_put;
else
++c_put;
+ list_del_init(&req->req_pending_local);
}
if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
dec_ap_pending(device);
++c_put;
+ req->acked_jif = jiffies;
+ advance_conn_req_ack_pending(peer_device, req);
}
- if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED))
+ if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
++c_put;
+ advance_conn_req_next(peer_device, req);
+ }
- if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
- if (req->rq_state & RQ_NET_SENT)
+ if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
+ if (s & RQ_NET_SENT)
atomic_sub(req->i.size >> 9, &device->ap_in_flight);
- ++k_put;
+ if (s & RQ_EXP_BARR_ACK)
+ ++k_put;
+ req->net_done_jif = jiffies;
+
+ /* in ahead/behind mode, or just in case,
+ * before we finally destroy this request,
+ * the caching pointers must not reference it anymore */
+ advance_conn_req_next(peer_device, req);
+ advance_conn_req_ack_pending(peer_device, req);
+ advance_conn_req_not_net_done(peer_device, req);
}
/* potentially complete and destroy */
@@ -439,6 +562,19 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
bdevname(device->ldev->backing_bdev, b));
}
+/* Helper for HANDED_OVER_TO_NETWORK.
+ * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
+ * Is it also still "PENDING"?
+ * --> If so, clear PENDING and set NET_OK below.
+ * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
+ * (and we must not set RQ_NET_OK) */
+static inline bool is_pending_write_protocol_A(struct drbd_request *req)
+{
+ return (req->rq_state &
+ (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
+ == (RQ_WRITE|RQ_NET_PENDING);
+}
+
/* obviously this could be coded as many single functions
* instead of one huge switch,
* or by putting the code directly in the respective locations
@@ -454,7 +590,9 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m)
{
- struct drbd_device *device = req->device;
+ struct drbd_device *const device = req->device;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
struct net_conf *nc;
int p, rv = 0;
@@ -477,7 +615,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
* and from w_read_retry_remote */
D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
p = nc->wire_protocol;
rcu_read_unlock();
req->rq_state |=
@@ -549,7 +687,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
mod_rq_state(req, m, 0, RQ_NET_QUEUED);
req->w.cb = w_send_read_req;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
break;
@@ -585,23 +723,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
req->w.cb = w_send_dblock;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
/* close the epoch, in case it outgrew the limit */
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
p = nc->max_epoch_size;
rcu_read_unlock();
- if (first_peer_device(device)->connection->current_tle_writes >= p)
- start_new_tl_epoch(first_peer_device(device)->connection);
+ if (connection->current_tle_writes >= p)
+ start_new_tl_epoch(connection);
break;
case QUEUE_FOR_SEND_OOS:
mod_rq_state(req, m, 0, RQ_NET_QUEUED);
req->w.cb = w_send_out_of_sync;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
break;
@@ -615,18 +753,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case HANDED_OVER_TO_NETWORK:
/* assert something? */
- if (bio_data_dir(req->master_bio) == WRITE &&
- !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
+ if (is_pending_write_protocol_A(req))
/* this is what is dangerous about protocol A:
* pretend it was successfully written on the peer. */
- if (req->rq_state & RQ_NET_PENDING)
- mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
- /* else: neg-ack was faster... */
- /* it is still not yet RQ_NET_DONE until the
- * corresponding epoch barrier got acked as well,
- * so we know what to dirty on connection loss */
- }
- mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
+ mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
+ RQ_NET_SENT|RQ_NET_OK);
+ else
+ mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
+ /* It is still not yet RQ_NET_DONE until the
+ * corresponding epoch barrier got acked as well,
+ * so we know what to dirty on connection loss. */
break;
case OOS_HANDED_TO_NETWORK:
@@ -658,12 +794,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case WRITE_ACKED_BY_PEER_AND_SIS:
req->rq_state |= RQ_NET_SIS;
case WRITE_ACKED_BY_PEER:
- D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
- /* protocol C; successfully written on peer.
+ /* Normal operation protocol C: successfully written on peer.
+ * During resync, even in protocol != C,
+ * we requested an explicit write ack anyways.
+ * Which means we cannot even assert anything here.
* Nothing more to do here.
* We want to keep the tl in place for all protocols, to cater
* for volatile write-back caches on lower level devices. */
-
goto ack_common;
case RECV_ACKED_BY_PEER:
D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
@@ -671,7 +808,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
* see also notes above in HANDED_OVER_TO_NETWORK about
* protocol != C */
ack_common:
- D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
break;
@@ -714,7 +850,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
get_ldev(device); /* always succeeds in this call path */
req->w.cb = w_restart_disk_io;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&req->w);
break;
@@ -736,7 +872,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
if (req->w.cb) {
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ /* w.cb expected to be w_send_dblock, or w_send_read_req */
+ drbd_queue_work(&connection->sender_work,
&req->w);
rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
} /* else: FIXME can this happen? */
@@ -769,7 +906,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case QUEUE_AS_DRBD_BARRIER:
- start_new_tl_epoch(first_peer_device(device)->connection);
+ start_new_tl_epoch(connection);
mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
break;
};
@@ -886,6 +1023,9 @@ static void maybe_pull_ahead(struct drbd_device *device)
connection->agreed_pro_version < 96)
return;
+ if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
+ return; /* nothing to do ... */
+
/* If I don't even have good local storage, we can not reasonably try
* to pull ahead of the peer. We also need the local reference to make
* sure device->act_log is there.
@@ -1021,6 +1161,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
* stable storage, and this is a WRITE, we may not even submit
* this bio. */
if (get_ldev(device)) {
+ req->pre_submit_jif = jiffies;
if (drbd_insert_fault(device,
rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
@@ -1035,10 +1176,14 @@ drbd_submit_req_private_bio(struct drbd_request *req)
static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
{
- spin_lock(&device->submit.lock);
+ spin_lock_irq(&device->resource->req_lock);
list_add_tail(&req->tl_requests, &device->submit.writes);
- spin_unlock(&device->submit.lock);
+ list_add_tail(&req->req_pending_master_completion,
+ &device->pending_master_completion[1 /* WRITE */]);
+ spin_unlock_irq(&device->resource->req_lock);
queue_work(device->submit.wq, &device->submit.worker);
+ /* do_submit() may sleep internally on al_wait, too */
+ wake_up(&device->al_wait);
}
/* returns the new drbd_request pointer, if the caller is expected to
@@ -1047,7 +1192,7 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re
* Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
*/
static struct drbd_request *
-drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time)
+drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
{
const int rw = bio_data_dir(bio);
struct drbd_request *req;
@@ -1062,7 +1207,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
bio_endio(bio, -ENOMEM);
return ERR_PTR(-ENOMEM);
}
- req->start_time = start_time;
+ req->start_jif = start_jif;
if (!get_ldev(device)) {
bio_put(req->private_bio);
@@ -1075,10 +1220,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
if (rw == WRITE && req->private_bio && req->i.size
&& !test_bit(AL_SUSPENDED, &device->flags)) {
if (!drbd_al_begin_io_fastpath(device, &req->i)) {
+ atomic_inc(&device->ap_actlog_cnt);
drbd_queue_write(device, req);
return NULL;
}
req->rq_state |= RQ_IN_ACT_LOG;
+ req->in_actlog_jif = jiffies;
}
return req;
@@ -1086,11 +1233,13 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
{
+ struct drbd_resource *resource = device->resource;
const int rw = bio_rw(req->master_bio);
struct bio_and_error m = { NULL, };
bool no_remote = false;
+ bool submit_private_bio = false;
- spin_lock_irq(&device->resource->req_lock);
+ spin_lock_irq(&resource->req_lock);
if (rw == WRITE) {
/* This may temporarily give up the req_lock,
* but will re-aquire it before it returns here.
@@ -1148,13 +1297,18 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
no_remote = true;
}
+ /* If it took the fast path in drbd_request_prepare, add it here.
+ * The slow path has added it already. */
+ if (list_empty(&req->req_pending_master_completion))
+ list_add_tail(&req->req_pending_master_completion,
+ &device->pending_master_completion[rw == WRITE]);
if (req->private_bio) {
/* needs to be marked within the same spinlock */
+ list_add_tail(&req->req_pending_local,
+ &device->pending_completion[rw == WRITE]);
_req_mod(req, TO_BE_SUBMITTED);
/* but we need to give up the spinlock to submit */
- spin_unlock_irq(&device->resource->req_lock);
- drbd_submit_req_private_bio(req);
- spin_lock_irq(&device->resource->req_lock);
+ submit_private_bio = true;
} else if (no_remote) {
nodata:
if (__ratelimit(&drbd_ratelimit_state))
@@ -1167,15 +1321,23 @@ nodata:
out:
if (drbd_req_put_completion_ref(req, &m, 1))
kref_put(&req->kref, drbd_req_destroy);
- spin_unlock_irq(&device->resource->req_lock);
-
+ spin_unlock_irq(&resource->req_lock);
+
+ /* Even though above is a kref_put(), this is safe.
+ * As long as we still need to submit our private bio,
+ * we hold a completion ref, and the request cannot disappear.
+ * If however this request did not even have a private bio to submit
+ * (e.g. remote read), req may already be invalid now.
+ * That's why we cannot check on req->private_bio. */
+ if (submit_private_bio)
+ drbd_submit_req_private_bio(req);
if (m.bio)
complete_master_bio(device, &m);
}
-void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time)
+void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
{
- struct drbd_request *req = drbd_request_prepare(device, bio, start_time);
+ struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
if (IS_ERR_OR_NULL(req))
return;
drbd_send_and_submit(device, req);
@@ -1194,6 +1356,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
continue;
req->rq_state |= RQ_IN_ACT_LOG;
+ req->in_actlog_jif = jiffies;
+ atomic_dec(&device->ap_actlog_cnt);
}
list_del_init(&req->tl_requests);
@@ -1203,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
static bool prepare_al_transaction_nonblock(struct drbd_device *device,
struct list_head *incoming,
- struct list_head *pending)
+ struct list_head *pending,
+ struct list_head *later)
{
struct drbd_request *req, *tmp;
int wake = 0;
@@ -1212,45 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
spin_lock_irq(&device->al_lock);
list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
err = drbd_al_begin_io_nonblock(device, &req->i);
+ if (err == -ENOBUFS)
+ break;
if (err == -EBUSY)
wake = 1;
if (err)
- continue;
- req->rq_state |= RQ_IN_ACT_LOG;
- list_move_tail(&req->tl_requests, pending);
+ list_move_tail(&req->tl_requests, later);
+ else
+ list_move_tail(&req->tl_requests, pending);
}
spin_unlock_irq(&device->al_lock);
if (wake)
wake_up(&device->al_wait);
-
return !list_empty(pending);
}
+void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
+{
+ struct drbd_request *req, *tmp;
+
+ list_for_each_entry_safe(req, tmp, pending, tl_requests) {
+ req->rq_state |= RQ_IN_ACT_LOG;
+ req->in_actlog_jif = jiffies;
+ atomic_dec(&device->ap_actlog_cnt);
+ list_del_init(&req->tl_requests);
+ drbd_send_and_submit(device, req);
+ }
+}
+
void do_submit(struct work_struct *ws)
{
struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
- LIST_HEAD(incoming);
- LIST_HEAD(pending);
- struct drbd_request *req, *tmp;
+ LIST_HEAD(incoming); /* from drbd_make_request() */
+ LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
+ LIST_HEAD(busy); /* blocked by resync requests */
+
+ /* grab new incoming requests */
+ spin_lock_irq(&device->resource->req_lock);
+ list_splice_tail_init(&device->submit.writes, &incoming);
+ spin_unlock_irq(&device->resource->req_lock);
for (;;) {
- spin_lock(&device->submit.lock);
- list_splice_tail_init(&device->submit.writes, &incoming);
- spin_unlock(&device->submit.lock);
+ DEFINE_WAIT(wait);
+ /* move used-to-be-busy back to front of incoming */
+ list_splice_init(&busy, &incoming);
submit_fast_path(device, &incoming);
if (list_empty(&incoming))
break;
-skip_fast_path:
- wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
- /* Maybe more was queued, while we prepared the transaction?
- * Try to stuff them into this transaction as well.
- * Be strictly non-blocking here, no wait_event, we already
- * have something to commit.
- * Stop if we don't make any more progres.
- */
for (;;) {
+ prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ list_splice_init(&busy, &incoming);
+ prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
+ if (!list_empty(&pending))
+ break;
+
+ schedule();
+
+ /* If all currently "hot" activity log extents are kept busy by
+ * incoming requests, we still must not totally starve new
+ * requests to "cold" extents.
+ * Something left on &incoming means there had not been
+ * enough update slots available, and the activity log
+ * has been marked as "starving".
+ *
+ * Try again now, without looking for new requests,
+ * effectively blocking all new requests until we made
+ * at least _some_ progress with what we currently have.
+ */
+ if (!list_empty(&incoming))
+ continue;
+
+ /* Nothing moved to pending, but nothing left
+ * on incoming: all moved to busy!
+ * Grab new and iterate. */
+ spin_lock_irq(&device->resource->req_lock);
+ list_splice_tail_init(&device->submit.writes, &incoming);
+ spin_unlock_irq(&device->resource->req_lock);
+ }
+ finish_wait(&device->al_wait, &wait);
+
+ /* If the transaction was full, before all incoming requests
+ * had been processed, skip ahead to commit, and iterate
+ * without splicing in more incoming requests from upper layers.
+ *
+ * Else, if all incoming have been processed,
+ * they have become either "pending" (to be submitted after
+ * next transaction commit) or "busy" (blocked by resync).
+ *
+ * Maybe more was queued, while we prepared the transaction?
+ * Try to stuff those into this transaction as well.
+ * Be strictly non-blocking here,
+ * we already have something to commit.
+ *
+ * Commit if we don't make any more progres.
+ */
+
+ while (list_empty(&incoming)) {
LIST_HEAD(more_pending);
LIST_HEAD(more_incoming);
bool made_progress;
@@ -1260,55 +1485,32 @@ skip_fast_path:
if (list_empty(&device->submit.writes))
break;
- spin_lock(&device->submit.lock);
+ spin_lock_irq(&device->resource->req_lock);
list_splice_tail_init(&device->submit.writes, &more_incoming);
- spin_unlock(&device->submit.lock);
+ spin_unlock_irq(&device->resource->req_lock);
if (list_empty(&more_incoming))
break;
- made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending);
+ made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
list_splice_tail_init(&more_pending, &pending);
list_splice_tail_init(&more_incoming, &incoming);
-
if (!made_progress)
break;
}
- drbd_al_begin_io_commit(device, false);
-
- list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
- list_del_init(&req->tl_requests);
- drbd_send_and_submit(device, req);
- }
- /* If all currently hot activity log extents are kept busy by
- * incoming requests, we still must not totally starve new
- * requests to cold extents. In that case, prepare one request
- * in blocking mode. */
- list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
- list_del_init(&req->tl_requests);
- req->rq_state |= RQ_IN_ACT_LOG;
- if (!drbd_al_begin_io_prepare(device, &req->i)) {
- /* Corresponding extent was hot after all? */
- drbd_send_and_submit(device, req);
- } else {
- /* Found a request to a cold extent.
- * Put on "pending" list,
- * and try to cumulate with more. */
- list_add(&req->tl_requests, &pending);
- goto skip_fast_path;
- }
- }
+ drbd_al_begin_io_commit(device);
+ send_and_submit_pending(device, &pending);
}
}
void drbd_make_request(struct request_queue *q, struct bio *bio)
{
struct drbd_device *device = (struct drbd_device *) q->queuedata;
- unsigned long start_time;
+ unsigned long start_jif;
- start_time = jiffies;
+ start_jif = jiffies;
/*
* what we "blindly" assume:
@@ -1316,7 +1518,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
inc_ap_bio(device);
- __drbd_make_request(device, bio, start_time);
+ __drbd_make_request(device, bio, start_jif);
}
/* This is called by bio_add_page().
@@ -1353,36 +1555,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
return limit;
}
-static void find_oldest_requests(
- struct drbd_connection *connection,
- struct drbd_device *device,
- struct drbd_request **oldest_req_waiting_for_peer,
- struct drbd_request **oldest_req_waiting_for_disk)
-{
- struct drbd_request *r;
- *oldest_req_waiting_for_peer = NULL;
- *oldest_req_waiting_for_disk = NULL;
- list_for_each_entry(r, &connection->transfer_log, tl_requests) {
- const unsigned s = r->rq_state;
- if (!*oldest_req_waiting_for_peer
- && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
- *oldest_req_waiting_for_peer = r;
-
- if (!*oldest_req_waiting_for_disk
- && (s & RQ_LOCAL_PENDING) && r->device == device)
- *oldest_req_waiting_for_disk = r;
-
- if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
- break;
- }
-}
-
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
struct drbd_connection *connection = first_peer_device(device)->connection;
- struct drbd_request *req_disk, *req_peer; /* oldest request */
+ struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
struct net_conf *nc;
+ unsigned long oldest_submit_jif;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;
@@ -1403,14 +1582,31 @@ void request_timer_fn(unsigned long data)
return; /* Recurring timer stopped */
now = jiffies;
+ nt = now + et;
spin_lock_irq(&device->resource->req_lock);
- find_oldest_requests(connection, device, &req_peer, &req_disk);
- if (req_peer == NULL && req_disk == NULL) {
- spin_unlock_irq(&device->resource->req_lock);
- mod_timer(&device->request_timer, now + et);
- return;
- }
+ req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
+ req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
+ req_peer = connection->req_not_net_done;
+ /* maybe the oldest request waiting for the peer is in fact still
+ * blocking in tcp sendmsg */
+ if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
+ req_peer = connection->req_next;
+
+ /* evaluate the oldest peer request only in one timer! */
+ if (req_peer && req_peer->device != device)
+ req_peer = NULL;
+
+ /* do we have something to evaluate? */
+ if (req_peer == NULL && req_write == NULL && req_read == NULL)
+ goto out;
+
+ oldest_submit_jif =
+ (req_write && req_read)
+ ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
+ ? req_write->pre_submit_jif : req_read->pre_submit_jif )
+ : req_write ? req_write->pre_submit_jif
+ : req_read ? req_read->pre_submit_jif : now;
/* The request is considered timed out, if
* - we have some effective timeout from the configuration,
@@ -1429,13 +1625,13 @@ void request_timer_fn(unsigned long data)
* to expire twice (worst case) to become effective. Good enough.
*/
if (ent && req_peer &&
- time_after(now, req_peer->start_time + ent) &&
+ time_after(now, req_peer->pre_send_jif + ent) &&
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}
- if (dt && req_disk &&
- time_after(now, req_disk->start_time + dt) &&
+ if (dt && oldest_submit_jif != now &&
+ time_after(now, oldest_submit_jif + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
@@ -1443,11 +1639,12 @@ void request_timer_fn(unsigned long data)
/* Reschedule timer for the nearest not already expired timeout.
* Fallback to now + min(effective network timeout, disk timeout). */
- ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
- ? req_peer->start_time + ent : now + et;
- dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
- ? req_disk->start_time + dt : now + et;
+ ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
+ ? req_peer->pre_send_jif + ent : now + et;
+ dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
+ ? oldest_submit_jif + dt : now + et;
nt = time_before(ent, dt) ? ent : dt;
+out:
spin_unlock_irq(&connection->resource->req_lock);
mod_timer(&device->request_timer, nt);
}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 8566cd5866b..9f6a04080e9 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device,
extern void request_timer_fn(unsigned long data);
extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
+extern void tl_abort_disk_io(struct drbd_device *device);
/* this is in drbd_main.c */
extern void drbd_restart_request(struct drbd_request *req);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index a5d8aae00e0..c35c0f001bb 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
return rv;
}
-static void print_st(struct drbd_device *device, char *name, union drbd_state ns)
+static void print_st(struct drbd_device *device, const char *name, union drbd_state ns)
{
drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
name,
@@ -952,11 +952,12 @@ enum drbd_state_rv
__drbd_set_state(struct drbd_device *device, union drbd_state ns,
enum chg_state_flags flags, struct completion *done)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
union drbd_state os;
enum drbd_state_rv rv = SS_SUCCESS;
enum sanitize_state_warnings ssw;
struct after_state_chg_work *ascw;
- bool did_remote, should_do_remote;
os = drbd_read_state(device);
@@ -978,9 +979,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
this happen...*/
if (is_valid_state(device, os) == rv)
- rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
+ rv = is_valid_soft_transition(os, ns, connection);
} else
- rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
+ rv = is_valid_soft_transition(os, ns, connection);
}
if (rv < SS_SUCCESS) {
@@ -997,7 +998,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
sanitize_state(). Only display it here if we where not called from
_conn_request_state() */
if (!(flags & CS_DC_SUSP))
- conn_pr_state_change(first_peer_device(device)->connection, os, ns,
+ conn_pr_state_change(connection, os, ns,
(flags & ~CS_DC_MASK) | CS_DC_SUSP);
/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
@@ -1008,28 +1009,35 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
atomic_inc(&device->local_cnt);
- did_remote = drbd_should_do_remote(device->state);
+ if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
+ clear_bit(RS_DONE, &device->flags);
+
+ /* changes to local_cnt and device flags should be visible before
+ * changes to state, which again should be visible before anything else
+ * depending on that change happens. */
+ smp_wmb();
device->state.i = ns.i;
- should_do_remote = drbd_should_do_remote(device->state);
device->resource->susp = ns.susp;
device->resource->susp_nod = ns.susp_nod;
device->resource->susp_fen = ns.susp_fen;
+ smp_wmb();
/* put replicated vs not-replicated requests in seperate epochs */
- if (did_remote != should_do_remote)
- start_new_tl_epoch(first_peer_device(device)->connection);
+ if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
+ drbd_should_do_remote((union drbd_dev_state)ns.i))
+ start_new_tl_epoch(connection);
if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
drbd_print_uuids(device, "attached to UUIDs");
/* Wake up role changes, that were delayed because of connection establishing */
if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
- no_peer_wf_report_params(first_peer_device(device)->connection))
- clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags);
+ no_peer_wf_report_params(connection))
+ clear_bit(STATE_SENT, &connection->flags);
wake_up(&device->misc_wait);
wake_up(&device->state_wait);
- wake_up(&first_peer_device(device)->connection->ping_wait);
+ wake_up(&connection->ping_wait);
/* Aborted verify run, or we reached the stop sector.
* Log the last position, unless end-of-device. */
@@ -1118,21 +1126,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
/* Receiver should clean up itself */
if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
- drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver);
+ drbd_thread_stop_nowait(&connection->receiver);
/* Now the receiver finished cleaning up itself, it should die */
if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
- drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver);
+ drbd_thread_stop_nowait(&connection->receiver);
/* Upon network failure, we need to restart the receiver. */
if (os.conn > C_WF_CONNECTION &&
ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
- drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver);
+ drbd_thread_restart_nowait(&connection->receiver);
/* Resume AL writing if we get a connection */
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
drbd_resume_al(device);
- first_peer_device(device)->connection->connect_cnt++;
+ connection->connect_cnt++;
}
/* remember last attach time so request_timer_fn() won't
@@ -1150,7 +1158,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
ascw->w.cb = w_after_state_ch;
ascw->device = device;
ascw->done = done;
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&connection->sender_work,
&ascw->w);
} else {
drbd_err(device, "Could not kmalloc an ascw\n");
@@ -1222,13 +1230,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
union drbd_state ns, enum chg_state_flags flags)
{
struct drbd_resource *resource = device->resource;
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
struct sib_info sib;
sib.sib_reason = SIB_STATE_CHANGE;
sib.os = os;
sib.ns = ns;
- if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+ if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
+ && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
clear_bit(CRASHED_PRIMARY, &device->flags);
if (device->p_uuid)
device->p_uuid[UI_FLAGS] &= ~((u64)2);
@@ -1245,7 +1256,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
state change. This function might sleep */
if (ns.susp_nod) {
- struct drbd_connection *connection = first_peer_device(device)->connection;
enum drbd_req_event what = NOTHING;
spin_lock_irq(&device->resource->req_lock);
@@ -1267,8 +1277,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
}
if (ns.susp_fen) {
- struct drbd_connection *connection = first_peer_device(device)->connection;
-
spin_lock_irq(&device->resource->req_lock);
if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
/* case2: The connection was established again: */
@@ -1294,8 +1302,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
* which is unexpected. */
if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
(ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
- first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) {
- drbd_gen_and_send_sync_uuid(first_peer_device(device));
+ connection->agreed_pro_version >= 96 && get_ldev(device)) {
+ drbd_gen_and_send_sync_uuid(peer_device);
put_ldev(device);
}
@@ -1309,8 +1317,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
atomic_set(&device->rs_pending_cnt, 0);
drbd_rs_cancel_all(device);
- drbd_send_uuids(first_peer_device(device));
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_uuids(peer_device);
+ drbd_send_state(peer_device, ns);
}
/* No point in queuing send_bitmap if we don't have a connection
* anymore, so check also the _current_ state, not only the new state
@@ -1335,7 +1343,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
set_bit(NEW_CUR_UUID, &device->flags);
} else {
drbd_uuid_new_current(device);
- drbd_send_uuids(first_peer_device(device));
+ drbd_send_uuids(peer_device);
}
}
put_ldev(device);
@@ -1346,7 +1354,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
drbd_uuid_new_current(device);
- drbd_send_uuids(first_peer_device(device));
+ drbd_send_uuids(peer_device);
}
/* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1373,16 +1381,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* Last part of the attaching process ... */
if (ns.conn >= C_CONNECTED &&
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
- drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */
- drbd_send_uuids(first_peer_device(device));
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_sizes(peer_device, 0, 0); /* to start sync... */
+ drbd_send_uuids(peer_device);
+ drbd_send_state(peer_device, ns);
}
/* We want to pause/continue resync, tell peer. */
if (ns.conn >= C_CONNECTED &&
((os.aftr_isp != ns.aftr_isp) ||
(os.user_isp != ns.user_isp)))
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* In case one of the isp bits got set, suspend other devices. */
if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
@@ -1392,10 +1400,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* Make sure the peer gets informed about eventual state
changes (ISP bits) while we were in WFReportParams. */
if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* We are in the progress to start a full sync... */
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
@@ -1449,7 +1457,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
drbd_disk_str(device->state.disk));
if (ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
drbd_rs_cancel_all(device);
@@ -1473,7 +1481,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
drbd_disk_str(device->state.disk));
if (ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* corresponding get_ldev in __drbd_set_state
* this may finally trigger drbd_ldev_destroy. */
put_ldev(device);
@@ -1481,7 +1489,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* Notify peer that I had a local IO error, and did not detached.. */
if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* Disks got bigger while they were detached */
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
@@ -1499,14 +1507,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* sync target done with resync. Explicitly notify peer, even though
* it should (at least for non-empty resyncs) already know itself. */
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* Verify finished, or reached stop sector. Peer did not know about
* the stop sector, and we may even have changed the stop sector during
* verify to interrupt/stop early. Send the new state. */
if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
&& verify_can_do_stop_sector(device))
- drbd_send_state(first_peer_device(device), ns);
+ drbd_send_state(peer_device, ns);
/* This triggers bitmap writeout of potentially still unwritten pages
* if the resync finished cleanly, or aborted because of peer disk
@@ -1563,7 +1571,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
old_conf = connection->net_conf;
connection->my_addr_len = 0;
connection->peer_addr_len = 0;
- rcu_assign_pointer(connection->net_conf, NULL);
+ RCU_INIT_POINTER(connection->net_conf, NULL);
conn_free_crypto(connection);
mutex_unlock(&connection->resource->conf_update);
@@ -1599,7 +1607,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
return 0;
}
-void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
+static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
{
enum chg_state_flags flags = ~0;
struct drbd_peer_device *peer_device;
@@ -1688,7 +1696,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
return rv;
}
-void
+static void
conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
{
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d8f57b6305c..50776b36282 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -67,13 +67,10 @@ rwlock_t global_state_lock;
*/
void drbd_md_io_complete(struct bio *bio, int error)
{
- struct drbd_md_io *md_io;
struct drbd_device *device;
- md_io = (struct drbd_md_io *)bio->bi_private;
- device = container_of(md_io, struct drbd_device, md_io);
-
- md_io->error = error;
+ device = bio->bi_private;
+ device->md_io.error = error;
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
@@ -87,7 +84,7 @@ void drbd_md_io_complete(struct bio *bio, int error)
* ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
*/
drbd_md_put_buffer(device);
- md_io->done = 1;
+ device->md_io.done = 1;
wake_up(&device->misc_wait);
bio_put(bio);
if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
@@ -135,6 +132,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
i = peer_req->i;
do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
block_id = peer_req->block_id;
+ peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
spin_lock_irqsave(&device->resource->req_lock, flags);
device->writ_cnt += peer_req->i.size >> 9;
@@ -398,9 +396,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
if (!get_ldev(device))
return -EIO;
- if (drbd_rs_should_slow_down(device, sector))
- goto defer;
-
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
@@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
peer_req->w.cb = w_e_send_csum;
spin_lock_irq(&device->resource->req_lock);
- list_add(&peer_req->w.list, &device->read_ee);
+ list_add_tail(&peer_req->w.list, &device->read_ee);
spin_unlock_irq(&device->resource->req_lock);
atomic_add(size >> 9, &device->rs_sect_ev);
@@ -452,9 +447,9 @@ void resync_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
- if (list_empty(&device->resync_work.list))
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
- &device->resync_work);
+ drbd_queue_work_if_unqueued(
+ &first_peer_device(device)->connection->sender_work,
+ &device->resync_work);
}
static void fifo_set(struct fifo_buffer *fb, int value)
@@ -504,9 +499,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
{
struct disk_conf *dc;
- unsigned int want; /* The number of sectors we want in the proxy */
+ unsigned int want; /* The number of sectors we want in-flight */
int req_sect; /* Number of sectors to request in this turn */
- int correction; /* Number of sectors more we need in the proxy*/
+ int correction; /* Number of sectors more we need in-flight */
int cps; /* correction per invocation of drbd_rs_controller() */
int steps; /* Number of time steps to plan ahead */
int curr_corr;
@@ -577,20 +572,27 @@ static int drbd_rs_number_requests(struct drbd_device *device)
* potentially causing a distributed deadlock on congestion during
* online-verify or (checksum-based) resync, if max-buffers,
* socket buffer sizes and resync rate settings are mis-configured. */
- if (mxb - device->rs_in_flight < number)
- number = mxb - device->rs_in_flight;
+
+ /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
+ * mxb (as used here, and in drbd_alloc_pages on the peer) is
+ * "number of pages" (typically also 4k),
+ * but "rs_in_flight" is in "sectors" (512 Byte). */
+ if (mxb - device->rs_in_flight/8 < number)
+ number = mxb - device->rs_in_flight/8;
return number;
}
-static int make_resync_request(struct drbd_device *device, int cancel)
+static int make_resync_request(struct drbd_device *const device, int cancel)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
unsigned long bit;
sector_t sector;
const sector_t capacity = drbd_get_capacity(device->this_bdev);
int max_bio_size;
int number, rollback_i, size;
- int align, queued, sndbuf;
+ int align, requeue = 0;
int i = 0;
if (unlikely(cancel))
@@ -617,17 +619,22 @@ static int make_resync_request(struct drbd_device *device, int cancel)
goto requeue;
for (i = 0; i < number; i++) {
- /* Stop generating RS requests, when half of the send buffer is filled */
- mutex_lock(&first_peer_device(device)->connection->data.mutex);
- if (first_peer_device(device)->connection->data.socket) {
- queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
- sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
- } else {
- queued = 1;
- sndbuf = 0;
- }
- mutex_unlock(&first_peer_device(device)->connection->data.mutex);
- if (queued > sndbuf / 2)
+ /* Stop generating RS requests when half of the send buffer is filled,
+ * but notify TCP that we'd like to have more space. */
+ mutex_lock(&connection->data.mutex);
+ if (connection->data.socket) {
+ struct sock *sk = connection->data.socket->sk;
+ int queued = sk->sk_wmem_queued;
+ int sndbuf = sk->sk_sndbuf;
+ if (queued > sndbuf / 2) {
+ requeue = 1;
+ if (sk->sk_socket)
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ } else
+ requeue = 1;
+ mutex_unlock(&connection->data.mutex);
+ if (requeue)
goto requeue;
next_sector:
@@ -642,8 +649,7 @@ next_sector:
sector = BM_BIT_TO_SECT(bit);
- if (drbd_rs_should_slow_down(device, sector) ||
- drbd_try_rs_begin_io(device, sector)) {
+ if (drbd_try_rs_begin_io(device, sector)) {
device->bm_resync_fo = bit;
goto requeue;
}
@@ -696,9 +702,9 @@ next_sector:
/* adjust very last sectors, in case we are oddly sized */
if (sector + (size>>9) > capacity)
size = (capacity-sector)<<9;
- if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
- first_peer_device(device)->connection->csums_tfm) {
- switch (read_for_csum(first_peer_device(device), sector, size)) {
+
+ if (device->use_csums) {
+ switch (read_for_csum(peer_device, sector, size)) {
case -EIO: /* Disk failure */
put_ldev(device);
return -EIO;
@@ -717,7 +723,7 @@ next_sector:
int err;
inc_rs_pending(device);
- err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
+ err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
sector, size, ID_SYNCER);
if (err) {
drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
@@ -774,8 +780,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
size = BM_BLOCK_SIZE;
- if (drbd_rs_should_slow_down(device, sector) ||
- drbd_try_rs_begin_io(device, sector)) {
+ if (drbd_try_rs_begin_io(device, sector)) {
device->ov_position = sector;
goto requeue;
}
@@ -911,7 +916,7 @@ int drbd_resync_finished(struct drbd_device *device)
if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
khelper_cmd = "after-resync-target";
- if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
+ if (device->use_csums && device->rs_total) {
const unsigned long s = device->rs_same_csum;
const unsigned long t = device->rs_total;
const int ratio =
@@ -1351,13 +1356,15 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
struct drbd_device *device = req->device;
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device->connection;
int err;
if (unlikely(cancel)) {
req_mod(req, SEND_CANCELED);
return 0;
}
+ req->pre_send_jif = jiffies;
/* this time, no connection->send.current_epoch_writes++;
* If it was sent, it was the closing barrier for the last
@@ -1365,7 +1372,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
* No more barriers will be sent, until we leave AHEAD mode again. */
maybe_send_barrier(connection, req->epoch);
- err = drbd_send_out_of_sync(first_peer_device(device), req);
+ err = drbd_send_out_of_sync(peer_device, req);
req_mod(req, OOS_HANDED_TO_NETWORK);
return err;
@@ -1380,19 +1387,21 @@ int w_send_dblock(struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
struct drbd_device *device = req->device;
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device->connection;
int err;
if (unlikely(cancel)) {
req_mod(req, SEND_CANCELED);
return 0;
}
+ req->pre_send_jif = jiffies;
re_init_if_first_write(connection, req->epoch);
maybe_send_barrier(connection, req->epoch);
connection->send.current_epoch_writes++;
- err = drbd_send_dblock(first_peer_device(device), req);
+ err = drbd_send_dblock(peer_device, req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
return err;
@@ -1407,19 +1416,21 @@ int w_send_read_req(struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
struct drbd_device *device = req->device;
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device->connection;
int err;
if (unlikely(cancel)) {
req_mod(req, SEND_CANCELED);
return 0;
}
+ req->pre_send_jif = jiffies;
/* Even read requests may close a write epoch,
* if there was any yet. */
maybe_send_barrier(connection, req->epoch);
- err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
+ err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
(unsigned long)req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
@@ -1433,7 +1444,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
- drbd_al_begin_io(device, &req->i, false);
+ drbd_al_begin_io(device, &req->i);
drbd_req_make_private_bio(req, req->master_bio);
req->private_bio->bi_bdev = device->ldev->backing_bdev;
@@ -1601,26 +1612,32 @@ void drbd_rs_controller_reset(struct drbd_device *device)
void start_resync_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
-
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
- &device->start_resync_work);
+ drbd_device_post_work(device, RS_START);
}
-int w_start_resync(struct drbd_work *w, int cancel)
+static void do_start_resync(struct drbd_device *device)
{
- struct drbd_device *device =
- container_of(w, struct drbd_device, start_resync_work);
-
if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
- drbd_warn(device, "w_start_resync later...\n");
+ drbd_warn(device, "postponing start_resync ...\n");
device->start_resync_timer.expires = jiffies + HZ/10;
add_timer(&device->start_resync_timer);
- return 0;
+ return;
}
drbd_start_resync(device, C_SYNC_SOURCE);
clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
- return 0;
+}
+
+static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
+{
+ bool csums_after_crash_only;
+ rcu_read_lock();
+ csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
+ rcu_read_unlock();
+ return connection->agreed_pro_version >= 89 && /* supported? */
+ connection->csums_tfm && /* configured? */
+ (csums_after_crash_only == 0 /* use for each resync? */
+ || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
}
/**
@@ -1633,6 +1650,8 @@ int w_start_resync(struct drbd_work *w, int cancel)
*/
void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
union drbd_state ns;
int r;
@@ -1651,7 +1670,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
if (r > 0) {
drbd_info(device, "before-resync-target handler returned %d, "
"dropping connection.\n", r);
- conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
+ conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
return;
}
} else /* C_SYNC_SOURCE */ {
@@ -1664,7 +1683,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
} else {
drbd_info(device, "before-resync-source handler returned %d, "
"dropping connection.\n", r);
- conn_request_state(first_peer_device(device)->connection,
+ conn_request_state(connection,
NS(conn, C_DISCONNECTING), CS_HARD);
return;
}
@@ -1672,7 +1691,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
}
}
- if (current == first_peer_device(device)->connection->worker.task) {
+ if (current == connection->worker.task) {
/* The worker should not sleep waiting for state_mutex,
that can take long */
if (!mutex_trylock(device->state_mutex)) {
@@ -1733,11 +1752,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
device->rs_mark_time[i] = now;
}
_drbd_pause_after(device);
+ /* Forget potentially stale cached per resync extent bit-counts.
+ * Open coded drbd_rs_cancel_all(device), we already have IRQs
+ * disabled, and know the disk state is ok. */
+ spin_lock(&device->al_lock);
+ lc_reset(device->resync);
+ device->resync_locked = 0;
+ device->resync_wenr = LC_FREE;
+ spin_unlock(&device->al_lock);
}
write_unlock(&global_state_lock);
spin_unlock_irq(&device->resource->req_lock);
if (r == SS_SUCCESS) {
+ wake_up(&device->al_wait); /* for lc_reset() above */
/* reset rs_last_bcast when a resync or verify is started,
* to deal with potential jiffies wrap. */
device->rs_last_bcast = jiffies - HZ;
@@ -1746,8 +1774,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
drbd_conn_str(ns.conn),
(unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
(unsigned long) device->rs_total);
- if (side == C_SYNC_TARGET)
+ if (side == C_SYNC_TARGET) {
device->bm_resync_fo = 0;
+ device->use_csums = use_checksum_based_resync(connection, device);
+ } else {
+ device->use_csums = 0;
+ }
/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
* with w_send_oos, or the sync target will get confused as to
@@ -1756,12 +1788,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
* drbd_resync_finished from here in that case.
* We drbd_gen_and_send_sync_uuid here for protocol < 96,
* and from after_state_ch otherwise. */
- if (side == C_SYNC_SOURCE &&
- first_peer_device(device)->connection->agreed_pro_version < 96)
- drbd_gen_and_send_sync_uuid(first_peer_device(device));
+ if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
+ drbd_gen_and_send_sync_uuid(peer_device);
- if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
- device->rs_total == 0) {
+ if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
/* This still has a race (about when exactly the peers
* detect connection loss) that can lead to a full sync
* on next handshake. In 8.3.9 we fixed this with explicit
@@ -1777,7 +1807,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
int timeo;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
rcu_read_unlock();
schedule_timeout_interruptible(timeo);
@@ -1799,10 +1829,165 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
mutex_unlock(device->state_mutex);
}
+static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
+{
+ struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
+ device->rs_last_bcast = jiffies;
+
+ if (!get_ldev(device))
+ return;
+
+ drbd_bm_write_lazy(device, 0);
+ if (resync_done && is_sync_state(device->state.conn))
+ drbd_resync_finished(device);
+
+ drbd_bcast_event(device, &sib);
+ /* update timestamp, in case it took a while to write out stuff */
+ device->rs_last_bcast = jiffies;
+ put_ldev(device);
+}
+
+static void drbd_ldev_destroy(struct drbd_device *device)
+{
+ lc_destroy(device->resync);
+ device->resync = NULL;
+ lc_destroy(device->act_log);
+ device->act_log = NULL;
+ __no_warn(local,
+ drbd_free_ldev(device->ldev);
+ device->ldev = NULL;);
+ clear_bit(GOING_DISKLESS, &device->flags);
+ wake_up(&device->misc_wait);
+}
+
+static void go_diskless(struct drbd_device *device)
+{
+ D_ASSERT(device, device->state.disk == D_FAILED);
+ /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
+ * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
+ * the protected members anymore, though, so once put_ldev reaches zero
+ * again, it will be safe to free them. */
+
+ /* Try to write changed bitmap pages, read errors may have just
+ * set some bits outside the area covered by the activity log.
+ *
+ * If we have an IO error during the bitmap writeout,
+ * we will want a full sync next time, just in case.
+ * (Do we want a specific meta data flag for this?)
+ *
+ * If that does not make it to stable storage either,
+ * we cannot do anything about that anymore.
+ *
+ * We still need to check if both bitmap and ldev are present, we may
+ * end up here after a failed attach, before ldev was even assigned.
+ */
+ if (device->bitmap && device->ldev) {
+ /* An interrupted resync or similar is allowed to recounts bits
+ * while we detach.
+ * Any modifications would not be expected anymore, though.
+ */
+ if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
+ "detach", BM_LOCKED_TEST_ALLOWED)) {
+ if (test_bit(WAS_READ_ERROR, &device->flags)) {
+ drbd_md_set_flag(device, MDF_FULL_SYNC);
+ drbd_md_sync(device);
+ }
+ }
+ }
+
+ drbd_force_state(device, NS(disk, D_DISKLESS));
+}
+
+static int do_md_sync(struct drbd_device *device)
+{
+ drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+ drbd_md_sync(device);
+ return 0;
+}
+
+/* only called from drbd_worker thread, no locking */
+void __update_timing_details(
+ struct drbd_thread_timing_details *tdp,
+ unsigned int *cb_nr,
+ void *cb,
+ const char *fn, const unsigned int line)
+{
+ unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
+ struct drbd_thread_timing_details *td = tdp + i;
+
+ td->start_jif = jiffies;
+ td->cb_addr = cb;
+ td->caller_fn = fn;
+ td->line = line;
+ td->cb_nr = *cb_nr;
+
+ i = (i+1) % DRBD_THREAD_DETAILS_HIST;
+ td = tdp + i;
+ memset(td, 0, sizeof(*td));
+
+ ++(*cb_nr);
+}
+
+#define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit))
+static void do_device_work(struct drbd_device *device, const unsigned long todo)
+{
+ if (WORK_PENDING(MD_SYNC, todo))
+ do_md_sync(device);
+ if (WORK_PENDING(RS_DONE, todo) ||
+ WORK_PENDING(RS_PROGRESS, todo))
+ update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
+ if (WORK_PENDING(GO_DISKLESS, todo))
+ go_diskless(device);
+ if (WORK_PENDING(DESTROY_DISK, todo))
+ drbd_ldev_destroy(device);
+ if (WORK_PENDING(RS_START, todo))
+ do_start_resync(device);
+}
+
+#define DRBD_DEVICE_WORK_MASK \
+ ((1UL << GO_DISKLESS) \
+ |(1UL << DESTROY_DISK) \
+ |(1UL << MD_SYNC) \
+ |(1UL << RS_START) \
+ |(1UL << RS_PROGRESS) \
+ |(1UL << RS_DONE) \
+ )
+
+static unsigned long get_work_bits(unsigned long *flags)
+{
+ unsigned long old, new;
+ do {
+ old = *flags;
+ new = old & ~DRBD_DEVICE_WORK_MASK;
+ } while (cmpxchg(flags, old, new) != old);
+ return old & DRBD_DEVICE_WORK_MASK;
+}
+
+static void do_unqueued_work(struct drbd_connection *connection)
+{
+ struct drbd_peer_device *peer_device;
+ int vnr;
+
+ rcu_read_lock();
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+ struct drbd_device *device = peer_device->device;
+ unsigned long todo = get_work_bits(&device->flags);
+ if (!todo)
+ continue;
+
+ kref_get(&device->kref);
+ rcu_read_unlock();
+ do_device_work(device, todo);
+ kref_put(&device->kref, drbd_destroy_device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
{
spin_lock_irq(&queue->q_lock);
- list_splice_init(&queue->q, work_list);
+ list_splice_tail_init(&queue->q, work_list);
spin_unlock_irq(&queue->q_lock);
return !list_empty(work_list);
}
@@ -1851,7 +2036,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
/* dequeue single item only,
* we still use drbd_queue_work_front() in some places */
if (!list_empty(&connection->sender_work.q))
- list_move(connection->sender_work.q.next, work_list);
+ list_splice_tail_init(&connection->sender_work.q, work_list);
spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
if (!list_empty(work_list) || signal_pending(current)) {
spin_unlock_irq(&connection->resource->req_lock);
@@ -1873,6 +2058,14 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
if (send_barrier)
maybe_send_barrier(connection,
connection->send.current_epoch_nr + 1);
+
+ if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
+ break;
+
+ /* drbd_send() may have called flush_signals() */
+ if (get_t_state(&connection->worker) != RUNNING)
+ break;
+
schedule();
/* may be woken up for other things but new work, too,
* e.g. if the current epoch got closed.
@@ -1906,10 +2099,15 @@ int drbd_worker(struct drbd_thread *thi)
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
- /* as long as we use drbd_queue_work_front(),
- * we may only dequeue single work items here, not batches. */
- if (list_empty(&work_list))
+ if (list_empty(&work_list)) {
+ update_worker_timing_details(connection, wait_for_work);
wait_for_work(connection, &work_list);
+ }
+
+ if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
+ update_worker_timing_details(connection, do_unqueued_work);
+ do_unqueued_work(connection);
+ }
if (signal_pending(current)) {
flush_signals(current);
@@ -1926,6 +2124,7 @@ int drbd_worker(struct drbd_thread *thi)
while (!list_empty(&work_list)) {
w = list_first_entry(&work_list, struct drbd_work, list);
list_del_init(&w->list);
+ update_worker_timing_details(connection, w->cb);
if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
continue;
if (connection->cstate >= C_WF_REPORT_PARAMS)
@@ -1934,13 +2133,18 @@ int drbd_worker(struct drbd_thread *thi)
}
do {
+ if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
+ update_worker_timing_details(connection, do_unqueued_work);
+ do_unqueued_work(connection);
+ }
while (!list_empty(&work_list)) {
w = list_first_entry(&work_list, struct drbd_work, list);
list_del_init(&w->list);
+ update_worker_timing_details(connection, w->cb);
w->cb(w, 1);
}
dequeue_work_batch(&connection->sender_work, &work_list);
- } while (!list_empty(&work_list));
+ } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f63d358f3d9..0a581400de0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -15,17 +15,22 @@
#include <linux/numa.h>
#define PART_BITS 4
+#define VQ_NAME_LEN 16
static int major;
static DEFINE_IDA(vd_index_ida);
static struct workqueue_struct *virtblk_wq;
+struct virtio_blk_vq {
+ struct virtqueue *vq;
+ spinlock_t lock;
+ char name[VQ_NAME_LEN];
+} ____cacheline_aligned_in_smp;
+
struct virtio_blk
{
struct virtio_device *vdev;
- struct virtqueue *vq;
- spinlock_t vq_lock;
/* The disk structure for the kernel. */
struct gendisk *disk;
@@ -47,6 +52,10 @@ struct virtio_blk
/* Ida index - used to track minor number allocations. */
int index;
+
+ /* num of vqs */
+ int num_vqs;
+ struct virtio_blk_vq *vqs;
};
struct virtblk_req
@@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
bool req_done = false;
+ int qid = vq->index;
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
- spin_lock_irqsave(&vblk->vq_lock, flags);
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
do {
virtqueue_disable_cb(vq);
- while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+ while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
blk_mq_complete_request(vbr->req);
req_done = true;
}
@@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq)
/* In case queue is stopped waiting for more buffers. */
if (req_done)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
@@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
unsigned long flags;
unsigned int num;
+ int qid = hctx->queue_num;
const bool last = (req->cmd_flags & REQ_END) != 0;
int err;
bool notify = false;
@@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
}
- spin_lock_irqsave(&vblk->vq_lock, flags);
- err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
+ err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
if (err) {
- virtqueue_kick(vblk->vq);
+ virtqueue_kick(vblk->vqs[qid].vq);
blk_mq_stop_hw_queue(hctx);
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
/* Out of mem doesn't actually happen, since we fall back
* to direct descriptors */
if (err == -ENOMEM || err == -ENOSPC)
@@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
return BLK_MQ_RQ_QUEUE_ERROR;
}
- if (last && virtqueue_kick_prepare(vblk->vq))
+ if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
notify = true;
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
if (notify)
- virtqueue_notify(vblk->vq);
+ virtqueue_notify(vblk->vqs[qid].vq);
return BLK_MQ_RQ_QUEUE_OK;
}
@@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev)
static int init_vq(struct virtio_blk *vblk)
{
int err = 0;
+ int i;
+ vq_callback_t **callbacks;
+ const char **names;
+ struct virtqueue **vqs;
+ unsigned short num_vqs;
+ struct virtio_device *vdev = vblk->vdev;
+
+ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
+ struct virtio_blk_config, num_queues,
+ &num_vqs);
+ if (err)
+ num_vqs = 1;
+
+ vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
+ if (!vblk->vqs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+ if (!names)
+ goto err_names;
+
+ callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+ if (!callbacks)
+ goto err_callbacks;
+
+ vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+ if (!vqs)
+ goto err_vqs;
- /* We expect one virtqueue, for output. */
- vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
- if (IS_ERR(vblk->vq))
- err = PTR_ERR(vblk->vq);
+ for (i = 0; i < num_vqs; i++) {
+ callbacks[i] = virtblk_done;
+ snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
+ names[i] = vblk->vqs[i].name;
+ }
+
+ /* Discover virtqueues and write information to configuration. */
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+ if (err)
+ goto err_find_vqs;
+ for (i = 0; i < num_vqs; i++) {
+ spin_lock_init(&vblk->vqs[i].lock);
+ vblk->vqs[i].vq = vqs[i];
+ }
+ vblk->num_vqs = num_vqs;
+
+ err_find_vqs:
+ kfree(vqs);
+ err_vqs:
+ kfree(callbacks);
+ err_callbacks:
+ kfree(names);
+ err_names:
+ if (err)
+ kfree(vblk->vqs);
+ out:
return err;
}
@@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev)
err = init_vq(vblk);
if (err)
goto out_free_vblk;
- spin_lock_init(&vblk->vq_lock);
/* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << PART_BITS);
@@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev)
/* Default queue sizing is to fill the ring. */
if (!virtblk_queue_depth) {
- virtblk_queue_depth = vblk->vq->num_free;
+ virtblk_queue_depth = vblk->vqs[0].vq->num_free;
/* ... but without indirect descs, we use 2 descs per req */
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
virtblk_queue_depth /= 2;
@@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev)
memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
vblk->tag_set.ops = &virtio_mq_ops;
- vblk->tag_set.nr_hw_queues = 1;
vblk->tag_set.queue_depth = virtblk_queue_depth;
vblk->tag_set.numa_node = NUMA_NO_NODE;
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev)
sizeof(struct virtblk_req) +
sizeof(struct scatterlist) * sg_elems;
vblk->tag_set.driver_data = vblk;
+ vblk->tag_set.nr_hw_queues = vblk->num_vqs;
err = blk_mq_alloc_tag_set(&vblk->tag_set);
if (err)
@@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev)
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
put_disk(vblk->disk);
vdev->config->del_vqs(vdev);
+ kfree(vblk->vqs);
kfree(vblk);
/* Only free device id if we don't have any users */
@@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = {
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
- VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
+ VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
+ VIRTIO_BLK_F_MQ,
};
static struct virtio_driver virtio_blk = {
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 8bc422977b5..4ff86878727 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -499,8 +499,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
}
/* Lookup the i2c hwclock */
- for (hwclock = NULL;
- (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
+ for_each_node_by_name(hwclock, "i2c-hwclock") {
const char *loc = of_get_property(hwclock,
"hwctrl-location", NULL);
if (loc == NULL)
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 544f6d327ed..061407d5952 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop)
goto error_out;
}
- /* Set ptr to new property if provided */
- if (new_prop) {
- /* Single property */
- if (!strncmp(new_prop->name, "status", new_prop->length)) {
- status = new_prop;
-
- } else if (!strncmp(new_prop->name, "ibm,max-sg-len",
- new_prop->length)) {
- maxsglen = new_prop;
-
- } else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
- new_prop->length)) {
- maxsyncop = new_prop;
-
- } else {
- /*
- * Skip the update, the property being updated
- * has no impact.
- */
- goto out;
- }
- }
+ /*
+ * If this is a property update, there are only certain properties that
+ * we care about. Bail if it isn't in the below list
+ */
+ if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+ strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+ strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+ goto out;
/* Perform property updates */
ret = nx842_OF_upd_status(new_devdata, status);
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 374b57fc596..a12c8552f6a 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -134,8 +134,7 @@ static void cell_edac_init_csrows(struct mem_ctl_info *mci)
int j;
u32 nr_pages;
- for (np = NULL;
- (np = of_find_node_by_name(np, "memory")) != NULL;) {
+ for_each_node_by_name(np, "memory") {
struct resource r;
/* We "know" that the Cell firmware only creates one entry
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index d3d0e8cf27b..d6c767ace91 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -382,6 +382,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index ca8430f9256..e67b9a50ac7 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -1085,6 +1085,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 22e0c926989..126516414c1 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -212,6 +212,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
dev_err(&client->dev,
"invalid gain on %s\n",
node->full_name);
+ return -EINVAL;
}
}
@@ -222,6 +223,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
dev_err(&client->dev,
"invalid data_rate on %s\n",
node->full_name);
+ return -EINVAL;
}
}
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index f96063680e5..272fcc837ec 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -510,6 +510,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
err = kstrtoul(buf, 10, &val);
if (err)
return err;
+
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 4ae3fff13f4..bea0a344fab 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -247,8 +247,8 @@ struct dme1737_data {
u8 pwm_acz[3];
u8 pwm_freq[6];
u8 pwm_rr[2];
- u8 zone_low[3];
- u8 zone_abs[3];
+ s8 zone_low[3];
+ s8 zone_abs[3];
u8 zone_hyst[2];
u32 alarms;
};
@@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res)
return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2));
}
-static inline int IN_TO_REG(int val, int nominal)
+static inline int IN_TO_REG(long val, int nominal)
{
return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255);
}
@@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res)
return (reg * 1000) >> (res - 8);
}
-static inline int TEMP_TO_REG(int val)
+static inline int TEMP_TO_REG(long val)
{
return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127);
}
@@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg)
return TEMP_RANGE[(reg >> 4) & 0x0f];
}
-static int TEMP_RANGE_TO_REG(int val, int reg)
+static int TEMP_RANGE_TO_REG(long val, int reg)
{
int i;
@@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix)
return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000;
}
-static inline int TEMP_HYST_TO_REG(int val, int ix, int reg)
+static inline int TEMP_HYST_TO_REG(long val, int ix, int reg)
{
int hyst = clamp_val((val + 500) / 1000, 0, 15);
@@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc)
return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg;
}
-static inline int FAN_TO_REG(int val, int tpc)
+static inline int FAN_TO_REG(long val, int tpc)
{
if (tpc) {
return clamp_val(val / tpc, 0, 0xffff);
@@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg)
return (edge > 0) ? 1 << (edge - 1) : 0;
}
-static inline int FAN_TYPE_TO_REG(int val, int reg)
+static inline int FAN_TYPE_TO_REG(long val, int reg)
{
int edge = (val == 4) ? 3 : val;
@@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg)
return 1000 + i * 500;
}
-static int FAN_MAX_TO_REG(int val)
+static int FAN_MAX_TO_REG(long val)
{
int i;
@@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg)
return acz[(reg >> 5) & 0x07];
}
-static inline int PWM_ACZ_TO_REG(int val, int reg)
+static inline int PWM_ACZ_TO_REG(long val, int reg)
{
int acz = (val == 4) ? 2 : val - 1;
@@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg)
return PWM_FREQ[reg & 0x0f];
}
-static int PWM_FREQ_TO_REG(int val, int reg)
+static int PWM_FREQ_TO_REG(long val, int reg)
{
int i;
@@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix)
return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0;
}
-static int PWM_RR_TO_REG(int val, int ix, int reg)
+static int PWM_RR_TO_REG(long val, int ix, int reg)
{
int i;
@@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix)
return PWM_RR_FROM_REG(reg, ix) ? 1 : 0;
}
-static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg)
+static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg)
{
int en = (ix == 1) ? 0x80 : 0x08;
@@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct dme1737_data *data = dev_get_drvdata(dev);
- long val;
+ unsigned long val;
int err;
- err = kstrtol(buf, 10, &val);
+ err = kstrtoul(buf, 10, &val);
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/emc6w201.c b/drivers/hwmon/emc6w201.c
index e87da902f3a..ada90716448 100644
--- a/drivers/hwmon/emc6w201.c
+++ b/drivers/hwmon/emc6w201.c
@@ -252,12 +252,12 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *devattr,
if (err < 0)
return err;
- val /= 1000;
+ val = DIV_ROUND_CLOSEST(val, 1000);
reg = (sf == min) ? EMC6W201_REG_TEMP_LOW(nr)
: EMC6W201_REG_TEMP_HIGH(nr);
mutex_lock(&data->update_lock);
- data->temp[sf][nr] = clamp_val(val, -127, 128);
+ data->temp[sf][nr] = clamp_val(val, -127, 127);
err = emc6w201_write8(client, reg, data->temp[sf][nr]);
mutex_unlock(&data->update_lock);
diff --git a/drivers/hwmon/hih6130.c b/drivers/hwmon/hih6130.c
index 0e01c4e13e3..7b73d2002d3 100644
--- a/drivers/hwmon/hih6130.c
+++ b/drivers/hwmon/hih6130.c
@@ -238,6 +238,9 @@ static int hih6130_probe(struct i2c_client *client,
hih6130->client = client;
mutex_init(&hih6130->lock);
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_QUICK))
+ hih6130->write_length = 1;
+
hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
hih6130,
hih6130_groups);
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index ba1d83d4805..a5e295826ae 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -617,6 +617,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
err = kstrtoul(buf, 10, &val);
if (err)
return err;
+
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index d2060e245ff..cfaf70b9cba 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -74,12 +74,9 @@ static inline int TEMP_FROM_REG(s16 reg)
return reg / 8 * 625 / 10;
}
-static inline s16 TEMP_TO_REG(int val)
+static inline s16 TEMP_TO_REG(long val)
{
- if (val <= -60000)
- return -60000 * 10 / 625 * 8;
- if (val >= 160000)
- return 160000 * 10 / 625 * 8;
+ val = clamp_val(val, -60000, 160000);
return val * 10 / 625 * 8;
}
@@ -206,10 +203,12 @@ static ssize_t set_temp_hyst(struct device *dev,
if (err)
return err;
+ val = clamp_val(val, -120000, 220000);
mutex_lock(&data->update_lock);
- data->temp[t_hyst] = TEMP_FROM_REG(data->temp[attr->index]) - val;
+ data->temp[t_hyst] =
+ TEMP_TO_REG(TEMP_FROM_REG(data->temp[attr->index]) - val);
i2c_smbus_write_word_swapped(client, LM92_REG_TEMP_HYST,
- TEMP_TO_REG(data->temp[t_hyst]));
+ data->temp[t_hyst]);
mutex_unlock(&data->update_lock);
return count;
}
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 988181e4cfc..145f674c1d8 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -615,6 +615,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c
index c74d2da389d..e42964f07f6 100644
--- a/drivers/hwmon/tmp103.c
+++ b/drivers/hwmon/tmp103.c
@@ -131,13 +131,6 @@ static int tmp103_probe(struct i2c_client *client,
struct regmap *regmap;
int ret;
- if (!i2c_check_functionality(client->adapter,
- I2C_FUNC_SMBUS_BYTE_DATA)) {
- dev_err(&client->dev,
- "adapter doesn't support SMBus byte transactions\n");
- return -ENODEV;
- }
-
regmap = devm_regmap_init_i2c(client, &tmp103_regmap_config);
if (IS_ERR(regmap)) {
dev_err(dev, "failed to allocate register map\n");
diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c
index 344b22ec255..3ea57c3504e 100644
--- a/drivers/hwmon/vt1211.c
+++ b/drivers/hwmon/vt1211.c
@@ -879,6 +879,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index c1726be3654..2f55973a8c4 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -820,6 +820,9 @@ store_vrm_reg(struct device *dev, struct device_attribute *attr, const char *buf
err = kstrtoul(buf, 10, &val);
if (err)
return err;
+
+ if (val > 255)
+ return -EINVAL;
data->vrm = val;
return count;
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index cb3765fec98..001df856913 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -1181,6 +1181,9 @@ static ssize_t store_vrm_reg(struct device *dev,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 9d63d71214c..816aa6caf5d 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -353,6 +353,9 @@ store_vrm(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ if (val > 255)
+ return -EINVAL;
+
data->vrm = val;
return count;
}
diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig
index 70637d23b1f..3612cb5b30b 100644
--- a/drivers/hwspinlock/Kconfig
+++ b/drivers/hwspinlock/Kconfig
@@ -10,7 +10,7 @@ menu "Hardware Spinlock drivers"
config HWSPINLOCK_OMAP
tristate "OMAP Hardware Spinlock device"
- depends on ARCH_OMAP4 || SOC_OMAP5
+ depends on ARCH_OMAP4 || SOC_OMAP5 || SOC_DRA7XX || SOC_AM33XX || SOC_AM43XX
select HWSPINLOCK
help
Say y here to support the OMAP Hardware Spinlock device (firstly
diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c
index 292869cc903..c1e2cd4d85f 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -98,10 +98,29 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
if (!io_base)
return -ENOMEM;
+ /*
+ * make sure the module is enabled and clocked before reading
+ * the module SYSSTATUS register
+ */
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(&pdev->dev);
+ goto iounmap_base;
+ }
+
/* Determine number of locks */
i = readl(io_base + SYSSTATUS_OFFSET);
i >>= SPINLOCK_NUMLOCKS_BIT_OFFSET;
+ /*
+ * runtime PM will make sure the clock of this module is
+ * enabled again iff at least one lock is requested
+ */
+ ret = pm_runtime_put(&pdev->dev);
+ if (ret < 0)
+ goto iounmap_base;
+
/* one of the four lsb's must be set, and nothing else */
if (hweight_long(i & 0xf) != 1 || i > 8) {
ret = -EINVAL;
@@ -121,12 +140,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
- /*
- * runtime PM will make sure the clock of this module is
- * enabled iff at least one lock is requested
- */
- pm_runtime_enable(&pdev->dev);
-
ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
pdata->base_id, num_locks);
if (ret)
@@ -135,9 +148,9 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
return 0;
reg_fail:
- pm_runtime_disable(&pdev->dev);
kfree(bank);
iounmap_base:
+ pm_runtime_disable(&pdev->dev);
iounmap(io_base);
return ret;
}
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 2bc7f5af64f..f6d29614cb0 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
- printk(KERN_ERR SPFX "Unable to find port agent\n");
+ dev_err(&device->dev, "Unable to find port agent\n");
return;
}
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
PTR_ERR(ah));
return;
}
@@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_KERNEL);
if (IS_ERR(send_buf)) {
- printk(KERN_ERR SPFX "ib_create_send_mad error\n");
+ dev_err(&device->dev, "ib_create_send_mad error\n");
goto err1;
}
@@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
}
if (ib_post_send_mad(send_buf, NULL)) {
- printk(KERN_ERR SPFX "ib_post_send_mad error\n");
+ dev_err(&device->dev, "ib_post_send_mad error\n");
goto err2;
}
return;
@@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+ dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
@@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
port_priv->agent[0] = ib_register_mad_agent(device, port_num,
IB_QPT_SMI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(port_priv->agent[0])) {
ret = PTR_ERR(port_priv->agent[0]);
goto error2;
@@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
port_priv->agent[1] = ib_register_mad_agent(device, port_num,
IB_QPT_GSI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(port_priv->agent[1])) {
ret = PTR_ERR(port_priv->agent[1]);
goto error3;
@@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del(&port_priv->port_list);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c3239170d8b..e28a494e2a3 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device)
struct cm_port *port;
struct ib_mad_reg_req reg_req = {
.mgmt_class = IB_MGMT_CLASS_CM,
- .mgmt_class_version = IB_CM_CLASS_VERSION
+ .mgmt_class_version = IB_CM_CLASS_VERSION,
};
struct ib_port_modify port_modify = {
.set_port_cap_mask = IB_PORT_CM_SUP
@@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device)
0,
cm_send_handler,
cm_recv_handler,
- port);
+ port,
+ 0);
if (IS_ERR(port->mad_agent))
goto error2;
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 3d2e489ab73..ff9163dc159 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -46,6 +46,7 @@
#include <linux/completion.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/sysctl.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
@@ -65,6 +66,20 @@ struct iwcm_work {
struct list_head free_list;
};
+static unsigned int default_backlog = 256;
+
+static struct ctl_table_header *iwcm_ctl_table_hdr;
+static struct ctl_table iwcm_ctl_table[] = {
+ {
+ .procname = "default_backlog",
+ .data = &default_backlog,
+ .maxlen = sizeof(default_backlog),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
/*
* The following services provide a mechanism for pre-allocating iwcm_work
* elements. The design pre-allocates them based on the cm_id type:
@@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ if (!backlog)
+ backlog = default_backlog;
+
ret = alloc_work_entries(cm_id_priv, backlog);
if (ret)
return ret;
@@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void)
if (!iwcm_wq)
return -ENOMEM;
+ iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
+ iwcm_ctl_table);
+ if (!iwcm_ctl_table_hdr) {
+ pr_err("iw_cm: couldn't register sysctl paths\n");
+ destroy_workqueue(iwcm_wq);
+ return -ENOMEM;
+ }
+
return 0;
}
static void __exit iw_cm_cleanup(void)
{
+ unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ab31f136d04..74c30f4c557 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -33,6 +33,9 @@
* SOFTWARE.
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 rmpp_version,
ib_mad_send_handler send_handler,
ib_mad_recv_handler recv_handler,
- void *context)
+ void *context,
+ u32 registration_flags)
{
struct ib_mad_port_private *port_priv;
struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
@@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate parameters */
qpn = get_spl_qp_index(qp_type);
- if (qpn == -1)
+ if (qpn == -1) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid QP Type %d\n",
+ qp_type);
goto error1;
+ }
- if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
+ if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid RMPP Version %u\n",
+ rmpp_version);
goto error1;
+ }
/* Validate MAD registration request if supplied */
if (mad_reg_req) {
- if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
+ if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid Class Version %u\n",
+ mad_reg_req->mgmt_class_version);
goto error1;
- if (!recv_handler)
+ }
+ if (!recv_handler) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: no recv_handler\n");
goto error1;
+ }
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
/*
* IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
* one in this range currently allowed
*/
if (mad_reg_req->mgmt_class !=
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
} else if (mad_reg_req->mgmt_class == 0) {
/*
* Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid Mgmt Class 0\n");
goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/*
* If class is in "new" vendor range,
* ensure supplied OUI is not zero
*/
- if (!is_vendor_oui(mad_reg_req->oui))
+ if (!is_vendor_oui(mad_reg_req->oui)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: No OUI specified for class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
/* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
- if (rmpp_version)
+ if (rmpp_version) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
+
/* Make sure class supplied is consistent with QP type */
if (qp_type == IB_QPT_SMI) {
if ((mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class !=
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
} else {
if ((mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class ==
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
} else {
/* No registration request supplied */
if (!send_handler)
goto error1;
+ if (registration_flags & IB_MAD_USER_RMPP)
+ goto error1;
}
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
+ dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
ret = ERR_PTR(-ENODEV);
goto error1;
}
@@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Verify the QP requested is supported. For example, Ethernet devices
* will not have QP0 */
if (!port_priv->qp_info[qpn].qp) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: QP %d not supported\n", qpn);
ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1;
}
@@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
mad_agent_priv->agent.context = context;
mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
mad_agent_priv->agent.port_num = port_num;
+ mad_agent_priv->agent.flags = registration_flags;
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
@@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
IB_SMI_DISCARD) {
ret = -EINVAL;
- printk(KERN_ERR PFX "Invalid directed route\n");
+ dev_err(&device->dev, "Invalid directed route\n");
goto out;
}
@@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for local response MAD\n");
+ dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
- "alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
+ dev_err(&send_buf->mad_agent->device->dev,
+ "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
+ sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
return 0;
}
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
+{
+ return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
+}
+EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
+
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
int rmpp_active,
@@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
pad = get_pad_size(hdr_len, data_len);
message_size = hdr_len + data_len + pad;
- if ((!mad_agent->rmpp_version &&
- (rmpp_active || message_size > sizeof(struct ib_mad))) ||
- (!rmpp_active && message_size > sizeof(struct ib_mad)))
- return ERR_PTR(-EINVAL);
+ if (ib_mad_kernel_rmpp_agent(mad_agent)) {
+ if (!rmpp_active && message_size > sizeof(struct ib_mad))
+ return ERR_PTR(-EINVAL);
+ } else
+ if (rmpp_active || message_size > sizeof(struct ib_mad))
+ return ERR_PTR(-EINVAL);
size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
@@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
&mad_agent_priv->send_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
ret = ib_send_rmpp_mad(mad_send_wr);
if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
ret = ib_send_mad(mad_send_wr);
@@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp);
int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
struct ib_wc *wc)
{
- printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+ dev_err(&mad_agent->device->dev,
+ "ib_process_mad_wc() not implemented yet\n");
return 0;
}
EXPORT_SYMBOL(ib_process_mad_wc);
@@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
- printk(KERN_ERR PFX "Method %d already in use\n", i);
+ pr_err("Method %d already in use\n", i);
return -EINVAL;
}
}
@@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
if (!*method) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_method_table\n");
+ pr_err("No memory for ib_mad_mgmt_method_table\n");
return -ENOMEM;
}
@@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
if (!vendor) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class_table\n");
goto error1;
}
@@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
if (!vendor_class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class\n");
goto error2;
}
@@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
goto check_in_use;
}
}
- printk(KERN_ERR PFX "All OUI slots in use\n");
+ dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
goto error3;
check_in_use:
@@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
if (mad_agent->agent.recv_handler)
atomic_inc(&mad_agent->refcount);
else {
- printk(KERN_NOTICE PFX "No receive handler for client "
- "%p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
+ dev_notice(&port_priv->device->dev,
+ "No receive handler for client %p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
mad_agent = NULL;
}
}
@@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num)
/* Make sure MAD base version is understood */
if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
- printk(KERN_ERR PFX "MAD received with unsupported base "
- "version %d\n", mad->mad_hdr.base_version);
+ pr_err("MAD received with unsupported base version %d\n",
+ mad->mad_hdr.base_version);
goto out;
}
@@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
return !mad_agent_priv->agent.rmpp_version ||
+ !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE) ||
(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
@@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
mad_recv_wc);
if (!mad_recv_wc) {
@@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
if (!mad_send_wr) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- ib_free_recv_mad(mad_recv_wc);
- deref_mad_agent(mad_agent_priv);
- return;
- }
- ib_mark_mad_done(mad_send_wr);
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
+ && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
+ & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ /* user rmpp is in effect
+ * and this is an active RMPP MAD
+ */
+ mad_recv_wc->wc->wr_id = 0;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
+ } else {
+ /* not user rmpp, revert to normal behavior and
+ * drop the mad */
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ return;
+ }
+ } else {
+ ib_mark_mad_done(mad_send_wr);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- /* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ /* Defined behavior is to complete response before request */
+ mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
- mad_send_wc.status = IB_WC_SUCCESS;
- mad_send_wc.vendor_err = 0;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ }
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
@@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!response) {
- printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
- "for response buffer\n");
+ dev_err(&port_priv->device->dev,
+ "ib_mad_recv_done_handler no memory for response buffer\n");
goto out;
}
@@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
mad_agent_priv = mad_send_wr->mad_agent_priv;
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
if (ret == IB_RMPP_RESULT_CONSUMED)
goto done;
@@ -2176,7 +2249,8 @@ retry:
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
&bad_send_wr);
if (ret) {
- printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "ib_post_send failed: %d\n", ret);
mad_send_wr = queued_send_wr;
wc->status = IB_WC_LOC_QP_OP_ERR;
goto retry;
@@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
IB_QP_STATE | IB_QP_CUR_STATE);
kfree(attr);
if (ret)
- printk(KERN_ERR PFX "mad_error_handler - "
- "ib_modify_qp to RTS : %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "mad_error_handler - ib_modify_qp to RTS : %d\n",
+ ret);
else
mark_sends_for_retry(qp_info);
}
@@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work)
if (local->mad_priv) {
recv_mad_agent = local->recv_mad_agent;
if (!recv_mad_agent) {
- printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+ dev_err(&mad_agent_priv->agent.device->dev,
+ "No receive MAD agent for local completion\n");
free_mad = 1;
goto local_send_completion;
}
@@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
- if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
switch (ret) {
case IB_RMPP_RESULT_UNHANDLED:
@@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
} else {
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!mad_priv) {
- printk(KERN_ERR PFX "No memory for receive buffer\n");
+ dev_err(&qp_info->port_priv->device->dev,
+ "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof mad_priv->header,
DMA_FROM_DEVICE);
kmem_cache_free(ib_mad_cache, mad_priv);
- printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+ dev_err(&qp_info->port_priv->device->dev,
+ "ib_post_recv failed: %d\n", ret);
break;
}
} while (post);
@@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
- printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't kmalloc ib_qp_attr\n");
return -ENOMEM;
}
@@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "INIT: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to INIT: %d\n",
+ i, ret);
goto out;
}
attr->qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTR: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTR: %d\n",
+ i, ret);
goto out;
}
@@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
attr->sq_psn = IB_MAD_SEND_Q_PSN;
ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTS: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTS: %d\n",
+ i, ret);
goto out;
}
}
ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
if (ret) {
- printk(KERN_ERR PFX "Failed to request completion "
- "notification: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "Failed to request completion notification: %d\n",
+ ret);
goto out;
}
@@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
if (ret) {
- printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't post receive WRs\n");
goto out;
}
}
@@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
struct ib_mad_qp_info *qp_info = qp_context;
/* It's worse than that! He's dead, Jim! */
- printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+ dev_err(&qp_info->port_priv->device->dev,
+ "Fatal error (%d) on MAD QP (%d)\n",
event->event, qp_info->qp->qp_num);
}
@@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
qp_init_attr.event_handler = qp_event_handler;
qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
if (IS_ERR(qp_info->qp)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
- get_spl_qp_index(qp_type));
+ dev_err(&qp_info->port_priv->device->dev,
+ "Couldn't create ib_mad QP%d\n",
+ get_spl_qp_index(qp_type));
ret = PTR_ERR(qp_info->qp);
goto error;
}
@@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device,
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_port_private\n");
return -ENOMEM;
}
@@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device,
ib_mad_thread_completion_handler,
NULL, port_priv, cq_size, 0);
if (IS_ERR(port_priv->cq)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+ dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
goto error3;
}
port_priv->pd = ib_alloc_pd(device);
if (IS_ERR(port_priv->pd)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
goto error4;
}
port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(port_priv->mr)) {
- printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+ dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
ret = PTR_ERR(port_priv->mr);
goto error5;
}
@@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device,
ret = ib_mad_port_start(port_priv);
if (ret) {
- printk(KERN_ERR PFX "Couldn't start port\n");
+ dev_err(&device->dev, "Couldn't start port\n");
goto error9;
}
@@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- printk(KERN_ERR PFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device)
for (i = start; i <= end; i++) {
if (ib_mad_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
if (ib_agent_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't open port %d for agents\n", i);
goto error_agent;
}
}
@@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device)
error_agent:
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
error:
i--;
while (i >= start) {
if (ib_agent_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n", i);
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
i--;
}
}
@@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device)
}
for (i = 0; i < num_ports; i++, cur_port++) {
if (ib_agent_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, cur_port);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n",
+ cur_port);
if (ib_mad_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, cur_port);
+ dev_err(&device->dev, "Couldn't close port %d\n",
+ cur_port);
}
}
@@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void)
SLAB_HWCACHE_ALIGN,
NULL);
if (!ib_mad_cache) {
- printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+ pr_err("Couldn't create ib_mad cache\n");
ret = -ENOMEM;
goto error1;
}
@@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void)
INIT_LIST_HEAD(&ib_mad_port_list);
if (ib_register_client(&mad_client)) {
- printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+ pr_err("Couldn't register ib_mad client\n");
ret = -EINVAL;
goto error2;
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4969c..d1a0b0ee944 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -42,9 +42,6 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
-
-#define PFX "ib_mad: "
-
#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
/* QP and CQ parameters */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 233eaf541f5..c38f030f0dc 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
- recv_handler, sa_dev);
+ recv_handler, sa_dev, 0);
if (IS_ERR(sa_dev->port[i].agent))
goto err;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1acb9910055..928cdd20e2d 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -33,6 +33,8 @@
* SOFTWARE.
*/
+#define pr_fmt(fmt) "user_mad: " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
@@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
- if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
- copy_offset = IB_MGMT_MAD_HDR;
- rmpp_active = 0;
- } else {
+
+ if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ && ib_mad_kernel_rmpp_agent(agent)) {
copy_offset = IB_MGMT_RMPP_HDR;
rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
- IB_MGMT_RMPP_FLAG_ACTIVE;
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ } else {
+ copy_offset = IB_MGMT_MAD_HDR;
+ rmpp_active = 0;
}
data_len = count - hdr_size(file) - hdr_len;
@@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rmpp_mad->mad_hdr.tid = *tid;
}
- spin_lock_irq(&file->send_lock);
- ret = is_duplicate(file, packet);
- if (!ret)
+ if (!ib_mad_kernel_rmpp_agent(agent)
+ && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ spin_lock_irq(&file->send_lock);
list_add_tail(&packet->list, &file->send_list);
- spin_unlock_irq(&file->send_lock);
- if (ret) {
- ret = -EINVAL;
- goto err_msg;
+ spin_unlock_irq(&file->send_lock);
+ } else {
+ spin_lock_irq(&file->send_lock);
+ ret = is_duplicate(file, packet);
+ if (!ret)
+ list_add_tail(&packet->list, &file->send_list);
+ spin_unlock_irq(&file->send_lock);
+ if (ret) {
+ ret = -EINVAL;
+ goto err_msg;
+ }
}
ret = ib_post_send_mad(packet->msg, NULL);
@@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: invalid device\n");
ret = -EPIPE;
goto out;
}
@@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: invalid QPN %d specified\n",
+ ureq.qpn);
ret = -EINVAL;
goto out;
}
@@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
found:
if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -657,7 +678,7 @@ found:
ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
ureq.mgmt_class ? &req : NULL,
ureq.rmpp_version,
- send_handler, recv_handler, file);
+ send_handler, recv_handler, file, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
agent = NULL;
@@ -673,10 +694,11 @@ found:
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- printk(KERN_WARNING "user_mad: process %s did not enable "
- "P_Key index support.\n", current->comm);
- printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
- "has info on the new ABI.\n");
+ dev_warn(file->port->dev,
+ "process %s did not enable P_Key index support.\n",
+ current->comm);
+ dev_warn(file->port->dev,
+ " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
}
}
@@ -694,6 +716,119 @@ out:
return ret;
}
+static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
+{
+ struct ib_user_mad_reg_req2 ureq;
+ struct ib_mad_reg_req req;
+ struct ib_mad_agent *agent = NULL;
+ int agent_id;
+ int ret;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ if (!file->port->ib_dev) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: invalid device\n");
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (copy_from_user(&ureq, arg, sizeof(ureq))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: invalid QPN %d specified\n",
+ ureq.qpn);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+ ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+ ret = -EINVAL;
+
+ if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
+ (u32 __user *) (arg + offsetof(struct
+ ib_user_mad_reg_req2, flags))))
+ ret = -EFAULT;
+
+ goto out;
+ }
+
+ for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+ if (!__get_agent(file, agent_id))
+ goto found;
+
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
+ ret = -ENOMEM;
+ goto out;
+
+found:
+ if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
+ req.mgmt_class = ureq.mgmt_class;
+ req.mgmt_class_version = ureq.mgmt_class_version;
+ if (ureq.oui & 0xff000000) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+ ureq.oui);
+ ret = -EINVAL;
+ goto out;
+ }
+ req.oui[2] = ureq.oui & 0x0000ff;
+ req.oui[1] = (ureq.oui & 0x00ff00) >> 8;
+ req.oui[0] = (ureq.oui & 0xff0000) >> 16;
+ memcpy(req.method_mask, ureq.method_mask,
+ sizeof(req.method_mask));
+ }
+
+ agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+ ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+ ureq.mgmt_class ? &req : NULL,
+ ureq.rmpp_version,
+ send_handler, recv_handler, file,
+ ureq.flags);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ agent = NULL;
+ goto out;
+ }
+
+ if (put_user(agent_id,
+ (u32 __user *)(arg +
+ offsetof(struct ib_user_mad_reg_req2, id)))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (!file->already_used) {
+ file->already_used = 1;
+ file->use_pkey_index = 1;
+ }
+
+ file->agent[agent_id] = agent;
+ ret = 0;
+
+out:
+ mutex_unlock(&file->mutex);
+
+ if (ret && agent)
+ ib_unregister_mad_agent(agent);
+
+ mutex_unlock(&file->port->file_mutex);
+
+ return ret;
+}
+
+
static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
{
struct ib_mad_agent *agent = NULL;
@@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_REGISTER_AGENT2:
+ return ib_umad_reg_agent2(filp->private_data, (void __user *) arg);
default:
return -ENOIOCTLCMD;
}
@@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_REGISTER_AGENT2:
+ return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg));
default:
return -ENOIOCTLCMD;
}
@@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO,
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(void)
+static int find_overflow_devnum(struct ib_device *device)
{
int ret;
@@ -991,7 +1130,8 @@ static int find_overflow_devnum(void)
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ dev_err(&device->dev,
+ "couldn't register dynamic device number\n");
return ret;
}
}
@@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
- devnum = find_overflow_devnum();
+ devnum = find_overflow_devnum(device);
if (devnum < 0)
return -1;
@@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void)
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register device number\n");
+ pr_err("couldn't register device number\n");
goto out;
}
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
- printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+ pr_err("couldn't create class infiniband_mad\n");
goto out_chrdev;
}
@@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void)
ret = class_create_file(umad_class, &class_attr_abi_version.attr);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+ pr_err("couldn't create abi_version attribute\n");
goto out_class;
}
ret = ib_register_client(&umad_client);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+ pr_err("couldn't register ib_umad client\n");
goto out_class;
}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a283274a5a0..643c08a025a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(rereg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(alloc_mw);
IB_UVERBS_DECLARE_CMD(dealloc_mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ea6203ee7bc..0600c50e621 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1002,6 +1002,99 @@ err_free:
return ret;
}
+ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_rereg_mr cmd;
+ struct ib_uverbs_rereg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_pd *pd = NULL;
+ struct ib_mr *mr;
+ struct ib_pd *old_pd;
+ int ret;
+ struct ib_uobject *uobj;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ return -EINVAL;
+
+ if ((cmd.flags & IB_MR_REREG_TRANS) &&
+ (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+ file->ucontext);
+
+ if (!uobj)
+ return -EINVAL;
+
+ mr = uobj->object;
+
+ if (cmd.flags & IB_MR_REREG_ACCESS) {
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ goto put_uobjs;
+ }
+
+ if (cmd.flags & IB_MR_REREG_PD) {
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+ }
+
+ if (atomic_read(&mr->usecnt)) {
+ ret = -EBUSY;
+ goto put_uobj_pd;
+ }
+
+ old_pd = mr->pd;
+ ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd, &udata);
+ if (!ret) {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_inc(&pd->usecnt);
+ mr->pd = pd;
+ atomic_dec(&old_pd->usecnt);
+ }
+ } else {
+ goto put_uobj_pd;
+ }
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+ else
+ ret = in_len;
+
+put_uobj_pd:
+ if (cmd.flags & IB_MR_REREG_PD)
+ put_pd_read(pd);
+
+put_uobjs:
+
+ put_uobj_write(mr->uobject);
+
+ return ret;
+}
+
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 08219fb3338..c73b22a257f 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
[IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
[IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
[IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 49e0e8533f7..1b63185b4ad 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
mq->msg_pool.host, dma_unmap_addr(mq, mapping));
}
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
- int msg_size)
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
+ size_t q_size, size_t msg_size)
{
u8 *pool_start;
+ if (q_size > SIZE_MAX / msg_size)
+ return -EINVAL;
+
pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
&mq->host_dma, GFP_KERNEL);
if (!pool_start)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index fbe6051af25..c9df0549f51 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
chp = get_chp(dev, qid);
if (chp) {
+ t4_clear_cq_armed(&chp->cq);
spin_lock_irqsave(&chp->comp_handler_lock, flag);
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index c158fcc02bc..41cd6882b64 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
struct c4iw_cq *schp)
{
int count;
- int flushed;
+ int rq_flushed, sq_flushed;
unsigned long flag;
PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
@@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
c4iw_flush_hw_cq(rchp);
c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
- flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+ rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&rchp->lock, flag);
- if (flushed) {
- spin_lock_irqsave(&rchp->comp_handler_lock, flag);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
- }
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, flag);
spin_lock(&qhp->lock);
if (schp != rchp)
c4iw_flush_hw_cq(schp);
- flushed = c4iw_flush_sq(qhp);
+ sq_flushed = c4iw_flush_sq(qhp);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, flag);
- if (flushed) {
- spin_lock_irqsave(&schp->comp_handler_lock, flag);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
- spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+
+ if (schp == rchp) {
+ if (t4_clear_cq_armed(&rchp->cq) &&
+ (rq_flushed || sq_flushed)) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+ } else {
+ if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+ if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
}
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index df5edfa31a8..c04e5134b30 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq)
return !wq->rq.queue[wq->rq.size].status.db_off;
}
+enum t4_cq_flags {
+ CQ_ARMED = 1,
+};
+
struct t4_cq {
struct t4_cqe *queue;
dma_addr_t dma_addr;
@@ -544,12 +548,19 @@ struct t4_cq {
u16 cidx_inc;
u8 gen;
u8 error;
+ unsigned long flags;
};
+static inline int t4_clear_cq_armed(struct t4_cq *cq)
+{
+ return test_and_clear_bit(CQ_ARMED, &cq->flags);
+}
+
static inline int t4_arm_cq(struct t4_cq *cq, int se)
{
u32 val;
+ set_bit(CQ_ARMED, &cq->flags);
while (cq->cidx_inc > CIDXINC_MASK) {
val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
INGRESSQID(cq->cqid);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 43f2d0424d4..e890e5ba0e0 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -726,7 +726,7 @@ bail:
* @dd: the infinipath device
* @pkeys: the PKEY table
*/
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
{
struct ipath_portdata *pd;
int i;
@@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
}
if (changed) {
u64 pkey;
+ struct ib_event event;
pkey = (u64) dd->ipath_pkeys[0] |
((u64) dd->ipath_pkeys[1] << 16) |
@@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
(unsigned long long) pkey);
ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
pkey);
+
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.device = &dd->verbs_dev->ibdev;
+ event.element.port_num = port;
+ ib_dispatch_event(&event);
}
return 0;
}
static int recv_subn_set_pkeytable(struct ib_smp *smp,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u8 port)
{
u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
__be16 *p = (__be16 *) smp->data;
@@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
for (i = 0; i < n; i++)
q[i] = be16_to_cpu(p[i]);
- if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
+ if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
smp->status |= IB_SMP_INVALID_FIELD;
return recv_subn_get_pkeytable(smp, ibdev);
@@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
ret = recv_subn_set_portinfo(smp, ibdev, port_num);
goto bail;
case IB_SMP_ATTR_PKEY_TABLE:
- ret = recv_subn_set_pkeytable(smp, ibdev);
+ ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
goto bail;
case IB_SMP_ATTR_SM_INFO:
if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 287ad0564ac..82a7dd87089 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0f7027e7db1..e1e558a3d69 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -910,8 +910,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
const struct default_rules *pdefault_rules = default_table;
u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
- for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
- pdefault_rules++) {
+ for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
memset(&field_types, 0, sizeof(field_types));
@@ -965,8 +964,7 @@ static int __mlx4_ib_create_default_rules(
int size = 0;
int i;
- for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
- sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
int ret;
union ib_flow_spec ib_spec;
switch (pdefault_rules->rules_create_list[i]) {
@@ -2007,6 +2005,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
@@ -2059,6 +2058,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
+ ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 369da3ca5d6..e8cad3926bf 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -788,5 +788,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+ u64 start, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index cb2a8727f3f..9b0e80e59b0 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
+ /* Force registering the memory as writable. */
+ /* Used for memory re-registeration. HCA protects the access */
mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
+ access_flags | IB_ACCESS_LOCAL_WRITE, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -183,6 +185,90 @@ err_free:
return ERR_PTR(err);
}
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+ u64 start, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(mr->device);
+ struct mlx4_ib_mr *mmr = to_mmr(mr);
+ struct mlx4_mpt_entry *mpt_entry;
+ struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
+ int err;
+
+ /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
+ * we assume that the calls can't run concurrently. Otherwise, a
+ * race exists.
+ */
+ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
+
+ if (err)
+ return err;
+
+ if (flags & IB_MR_REREG_PD) {
+ err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
+ to_mpd(pd)->pdn);
+
+ if (err)
+ goto release_mpt_entry;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
+ convert_access(mr_access_flags));
+
+ if (err)
+ goto release_mpt_entry;
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ int shift;
+ int err;
+ int n;
+
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+ ib_umem_release(mmr->umem);
+ mmr->umem = ib_umem_get(mr->uobject->context, start, length,
+ mr_access_flags |
+ IB_ACCESS_LOCAL_WRITE,
+ 0);
+ if (IS_ERR(mmr->umem)) {
+ err = PTR_ERR(mmr->umem);
+ mmr->umem = NULL;
+ goto release_mpt_entry;
+ }
+ n = ib_umem_page_count(mmr->umem);
+ shift = ilog2(mmr->umem->page_size);
+
+ mmr->mmr.iova = virt_addr;
+ mmr->mmr.size = length;
+ err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
+ virt_addr, length, n, shift,
+ *pmpt_entry);
+ if (err) {
+ ib_umem_release(mmr->umem);
+ goto release_mpt_entry;
+ }
+
+ err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
+ if (err) {
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+ ib_umem_release(mmr->umem);
+ goto release_mpt_entry;
+ }
+ }
+
+ /* If we couldn't transfer the MR to the HCA, just remember to
+ * return a failure. But dereg_mr will free the resources.
+ */
+ err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
+
+release_mpt_entry:
+ mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
+
+ return err;
+}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7efe6e3f354..8c574b63d77 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2501,7 +2501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+ if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
err = -EINVAL;
*bad_wr = wr;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index b6f7f457fc5..8881fa376e0 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev)
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 19011dbb930..b43456ae124 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -40,7 +40,7 @@
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u"
#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -137,6 +137,7 @@ struct mqe_ctx {
u16 cqe_status;
u16 ext_status;
bool cmd_done;
+ bool fw_error_state;
};
struct ocrdma_hw_mr {
@@ -235,7 +236,10 @@ struct ocrdma_dev {
struct list_head entry;
struct rcu_head rcu;
int id;
- u64 stag_arr[OCRDMA_MAX_STAG];
+ u64 *stag_arr;
+ u8 sl; /* service level */
+ bool pfc_state;
+ atomic_t update_sl;
u16 pvid;
u32 asic_id;
@@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
}
+static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio)
+{
+ return *(pfc + prio);
+}
+
+static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio)
+{
+ return *(app_prio + prio);
+}
+
+static inline u8 ocrdma_is_enabled_and_synced(u32 state)
+{ /* May also be used to interpret TC-state, QCN-state
+ * Appl-state and Logical-link-state in future.
+ */
+ return (state & OCRDMA_STATE_FLAG_ENABLED) &&
+ (state & OCRDMA_STATE_FLAG_SYNC);
+}
+
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index d4cc01f10c0..40f8536c10b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -35,6 +35,8 @@
#include "ocrdma_ah.h"
#include "ocrdma_hw.h"
+#define OCRDMA_VID_PCP_SHIFT 0xD
+
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, int pdid)
{
@@ -55,7 +57,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
if (vlan_tag && (vlan_tag < 0x1000)) {
eth.eth_type = cpu_to_be16(0x8100);
eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
- vlan_tag |= (attr->sl & 7) << 13;
+ vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
eth.vlan_tag = cpu_to_be16(vlan_tag);
eth_sz = sizeof(struct ocrdma_eth_vlan);
vlan_enabled = true;
@@ -100,6 +102,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ if (atomic_cmpxchg(&dev->update_sl, 1, 0))
+ ocrdma_init_service_level(dev);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 3bbf2010a82..dd35ae558ae 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -525,7 +525,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
cmd->eqn = eq->id;
- cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
+ cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe);
ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
cq->dma, PAGE_SIZE_4K);
@@ -661,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
{
struct ocrdma_qp *qp = NULL;
struct ocrdma_cq *cq = NULL;
- struct ib_event ib_evt = { 0 };
+ struct ib_event ib_evt;
int cq_event = 0;
int qp_event = 1;
int srq_event = 0;
@@ -674,6 +674,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+ memset(&ib_evt, 0, sizeof(ib_evt));
+
ib_evt.device = &dev->ibdev;
switch (type) {
@@ -771,6 +773,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
break;
+
+ case OCRDMA_ASYNC_EVENT_COS_VALUE:
+ atomic_set(&dev->update_sl, 1);
+ break;
default:
/* Not interested evts. */
break;
@@ -962,8 +968,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
msecs_to_jiffies(30000));
if (status)
return 0;
- else
+ else {
+ dev->mqe_ctx.fw_error_state = true;
+ pr_err("%s(%d) mailbox timeout: fw not responding\n",
+ __func__, dev->id);
return -1;
+ }
}
/* issue a mailbox command on the MQ */
@@ -975,6 +985,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
struct ocrdma_mbx_rsp *rsp = NULL;
mutex_lock(&dev->mqe_ctx.lock);
+ if (dev->mqe_ctx.fw_error_state)
+ goto mbx_err;
ocrdma_post_mqe(dev, mqe);
status = ocrdma_wait_mqe_cmpl(dev);
if (status)
@@ -1078,7 +1090,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
attr->max_mw = rsp->max_mw;
attr->max_mr = rsp->max_mr;
- attr->max_mr_size = ~0ull;
+ attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) |
+ rsp->max_mr_size_lo;
attr->max_fmr = 0;
attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
@@ -1252,7 +1265,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
- dev->hba_port_num = hba_attribs->phy_port;
+ dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
+ OCRDMA_HBA_ATTRB_PTNUM_MASK)
+ >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
strncpy(dev->model_number,
hba_attribs->controller_model_number, 31);
}
@@ -1302,7 +1317,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
goto mbx_err;
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
- *lnk_speed = rsp->phys_port_speed;
+ *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+ >> OCRDMA_PHY_PS_SHIFT;
mbx_err:
kfree(cmd);
@@ -1328,11 +1344,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
goto mbx_err;
rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
- dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+ dev->phy.phy_type =
+ (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK);
+ dev->phy.interface_type =
+ (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK)
+ >> OCRDMA_IF_TYPE_SHIFT;
dev->phy.auto_speeds_supported =
- le16_to_cpu(rsp->auto_speeds_supported);
+ (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK);
dev->phy.fixed_speeds_supported =
- le16_to_cpu(rsp->fixed_speeds_supported);
+ (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK)
+ >> OCRDMA_FSPEED_SUPP_SHIFT;
mbx_err:
kfree(cmd);
return status;
@@ -1457,8 +1478,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
- pbes[i].pa_lo = (u32) (pa & 0xffffffff);
- pbes[i].pa_hi = (u32) upper_32_bits(pa);
+ pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff);
+ pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa));
pa += PAGE_SIZE;
}
cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
@@ -1501,6 +1522,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
dev->av_tbl.pa);
+ dev->av_tbl.va = NULL;
dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
dev->av_tbl.pbl.pa);
kfree(cmd);
@@ -1624,14 +1646,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
OCRDMA_CREATE_CQ_TYPE_SHIFT;
cq->phase_change = false;
- cmd->cmd.cqe_count = (cq->len / cqe_size);
+ cmd->cmd.pdid_cqecnt = (cq->len / cqe_size);
} else {
- cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
+ cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1;
cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
cq->phase_change = true;
}
- cmd->cmd.pd_id = pd_id; /* valid only for v3 */
+ /* pd_id valid only for v3 */
+ cmd->cmd.pdid_cqecnt |= (pd_id <<
+ OCRDMA_CREATE_CQ_CMD_PDID_SHIFT);
ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (status)
@@ -2206,7 +2230,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
qp->rq_cq = cq;
- if (pd->dpp_enabled && pd->num_dpp_qp) {
+ if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
+ (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
dpp_cq_id);
}
@@ -2264,6 +2289,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
return -EINVAL;
+ if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0))
+ ocrdma_init_service_level(qp->dev);
cmd->params.tclass_sq_psn |=
(ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
@@ -2297,6 +2324,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
+ cmd->params.rnt_rc_sl_fl |=
+ (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
}
return 0;
}
@@ -2604,6 +2633,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
return status;
}
+static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
+ struct ocrdma_dcbx_cfg *dcbxcfg)
+{
+ int status = 0;
+ dma_addr_t pa;
+ struct ocrdma_mqe cmd;
+
+ struct ocrdma_get_dcbx_cfg_req *req = NULL;
+ struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge;
+
+ memset(&cmd, 0, sizeof(struct ocrdma_mqe));
+ cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp),
+ sizeof(struct ocrdma_get_dcbx_cfg_req));
+ req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL);
+ if (!req) {
+ status = -ENOMEM;
+ goto mem_err;
+ }
+
+ cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL);
+ mqe_sge->pa_hi = (u32) upper_32_bits(pa);
+ mqe_sge->len = cmd.hdr.pyld_len;
+
+ memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
+ ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
+ OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
+ req->param_type = ptype;
+
+ status = ocrdma_mbx_cmd(dev, &cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req;
+ ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp));
+ memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg));
+
+mbx_err:
+ dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa);
+mem_err:
+ return status;
+}
+
+#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08
+#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05
+
+static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
+ struct ocrdma_dcbx_cfg *dcbxcfg,
+ u8 *srvc_lvl)
+{
+ int status = -EINVAL, indx, slindx;
+ int ventry_cnt;
+ struct ocrdma_app_parameter *app_param;
+ u8 valid, proto_sel;
+ u8 app_prio, pfc_prio;
+ u16 proto;
+
+ if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) {
+ pr_info("%s ocrdma%d DCBX is disabled\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ goto out;
+ }
+
+ if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) {
+ pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id,
+ (ptype > 0 ? "operational" : "admin"),
+ (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ?
+ "enabled" : "disabled",
+ (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ?
+ "" : ", not sync'ed");
+ goto out;
+ } else {
+ pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ }
+
+ ventry_cnt = (dcbxcfg->tcv_aev_opv_st >>
+ OCRDMA_DCBX_APP_ENTRY_SHIFT)
+ & OCRDMA_DCBX_STATE_MASK;
+
+ for (indx = 0; indx < ventry_cnt; indx++) {
+ app_param = &dcbxcfg->app_param[indx];
+ valid = (app_param->valid_proto_app >>
+ OCRDMA_APP_PARAM_VALID_SHIFT)
+ & OCRDMA_APP_PARAM_VALID_MASK;
+ proto_sel = (app_param->valid_proto_app
+ >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT)
+ & OCRDMA_APP_PARAM_PROTO_SEL_MASK;
+ proto = app_param->valid_proto_app &
+ OCRDMA_APP_PARAM_APP_PROTO_MASK;
+
+ if (
+ valid && proto == OCRDMA_APP_PROTO_ROCE &&
+ proto_sel == OCRDMA_PROTO_SELECT_L2) {
+ for (slindx = 0; slindx <
+ OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
+ app_prio = ocrdma_get_app_prio(
+ (u8 *)app_param->app_prio,
+ slindx);
+ pfc_prio = ocrdma_get_pfc_prio(
+ (u8 *)dcbxcfg->pfc_prio,
+ slindx);
+
+ if (app_prio && pfc_prio) {
+ *srvc_lvl = slindx;
+ status = 0;
+ goto out;
+ }
+ }
+ if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) {
+ pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n",
+ dev_name(&dev->nic_info.pdev->dev),
+ dev->id, proto);
+ }
+ }
+ }
+
+out:
+ return status;
+}
+
+void ocrdma_init_service_level(struct ocrdma_dev *dev)
+{
+ int status = 0, indx;
+ struct ocrdma_dcbx_cfg dcbxcfg;
+ u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL;
+ int ptype = OCRDMA_PARAMETER_TYPE_OPER;
+
+ for (indx = 0; indx < 2; indx++) {
+ status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg);
+ if (status) {
+ pr_err("%s(): status=%d\n", __func__, status);
+ ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+ continue;
+ }
+
+ status = ocrdma_parse_dcbxcfg_rsp(dev, ptype,
+ &dcbxcfg, &srvc_lvl);
+ if (status) {
+ ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+ continue;
+ }
+
+ break;
+ }
+
+ if (status)
+ pr_info("%s ocrdma%d service level default\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ else
+ pr_info("%s ocrdma%d service level %d\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id,
+ srvc_lvl);
+
+ dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state);
+ dev->sl = srvc_lvl;
+}
+
int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
{
int i;
@@ -2709,13 +2900,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
goto conf_err;
status = ocrdma_mbx_get_phy_info(dev);
if (status)
- goto conf_err;
+ goto info_attrb_err;
status = ocrdma_mbx_get_ctrl_attribs(dev);
if (status)
- goto conf_err;
+ goto info_attrb_err;
return 0;
+info_attrb_err:
+ ocrdma_mbx_delete_ah_tbl(dev);
conf_err:
ocrdma_destroy_mq(dev);
mq_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index e513f729314..6eed8f19132 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
char *port_speed_string(struct ocrdma_dev *dev);
+void ocrdma_init_service_level(struct ocrdma_dev *);
+
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7c504e07974..256a06bc0b6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
if (!dev->qp_tbl)
goto alloc_err;
}
+
+ dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL);
+ if (dev->stag_arr == NULL)
+ goto alloc_err;
+
spin_lock_init(&dev->av_tbl.lock);
spin_lock_init(&dev->flush_q_lock);
return 0;
@@ -334,6 +339,7 @@ alloc_err:
static void ocrdma_free_resources(struct ocrdma_dev *dev)
{
+ kfree(dev->stag_arr);
kfree(dev->qp_tbl);
kfree(dev->cq_tbl);
kfree(dev->sgid_tbl);
@@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
{
struct ocrdma_dev *dev = dev_get_drvdata(device);
- return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
+}
+
+static ssize_t show_hca_type(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
static struct device_attribute *ocrdma_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type
};
static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
@@ -372,6 +388,58 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
}
+static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
+ struct net_device *net)
+{
+ struct in_device *in_dev;
+ union ib_gid gid;
+ in_dev = in_dev_get(net);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ ocrdma_add_sgid(dev, &gid);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+}
+
+static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev,
+ struct net_device *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+ in6_dev = in6_dev_get(net);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ ocrdma_add_sgid(dev, pgid);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
+{
+ struct net_device *net_dev;
+
+ for_each_netdev(&init_net, net_dev) {
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ?
+ rdma_vlan_dev_real_dev(net_dev) : net_dev;
+
+ if (real_dev == dev->nic_info.netdev) {
+ ocrdma_init_ipv4_gids(dev, net_dev);
+ ocrdma_init_ipv6_gids(dev, net_dev);
+ }
+ }
+}
+
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
int status = 0, i;
@@ -399,6 +467,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
+ ocrdma_init_service_level(dev);
+ ocrdma_init_gid_table(dev);
status = ocrdma_register_device(dev);
if (status)
goto alloc_err;
@@ -508,6 +578,12 @@ static int ocrdma_close(struct ocrdma_dev *dev)
return 0;
}
+static void ocrdma_shutdown(struct ocrdma_dev *dev)
+{
+ ocrdma_close(dev);
+ ocrdma_remove(dev);
+}
+
/* event handling via NIC driver ensures that all the NIC specific
* initialization done before RoCE driver notifies
* event to stack.
@@ -521,6 +597,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
case BE_DEV_DOWN:
ocrdma_close(dev);
break;
+ case BE_DEV_SHUTDOWN:
+ ocrdma_shutdown(dev);
+ break;
}
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 96c9ee602ba..904989ec5ea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -44,35 +44,39 @@ enum {
#define OCRDMA_SUBSYS_ROCE 10
enum {
OCRDMA_CMD_QUERY_CONFIG = 1,
- OCRDMA_CMD_ALLOC_PD,
- OCRDMA_CMD_DEALLOC_PD,
-
- OCRDMA_CMD_CREATE_AH_TBL,
- OCRDMA_CMD_DELETE_AH_TBL,
-
- OCRDMA_CMD_CREATE_QP,
- OCRDMA_CMD_QUERY_QP,
- OCRDMA_CMD_MODIFY_QP,
- OCRDMA_CMD_DELETE_QP,
-
- OCRDMA_CMD_RSVD1,
- OCRDMA_CMD_ALLOC_LKEY,
- OCRDMA_CMD_DEALLOC_LKEY,
- OCRDMA_CMD_REGISTER_NSMR,
- OCRDMA_CMD_REREGISTER_NSMR,
- OCRDMA_CMD_REGISTER_NSMR_CONT,
- OCRDMA_CMD_QUERY_NSMR,
- OCRDMA_CMD_ALLOC_MW,
- OCRDMA_CMD_QUERY_MW,
-
- OCRDMA_CMD_CREATE_SRQ,
- OCRDMA_CMD_QUERY_SRQ,
- OCRDMA_CMD_MODIFY_SRQ,
- OCRDMA_CMD_DELETE_SRQ,
-
- OCRDMA_CMD_ATTACH_MCAST,
- OCRDMA_CMD_DETACH_MCAST,
- OCRDMA_CMD_GET_RDMA_STATS,
+ OCRDMA_CMD_ALLOC_PD = 2,
+ OCRDMA_CMD_DEALLOC_PD = 3,
+
+ OCRDMA_CMD_CREATE_AH_TBL = 4,
+ OCRDMA_CMD_DELETE_AH_TBL = 5,
+
+ OCRDMA_CMD_CREATE_QP = 6,
+ OCRDMA_CMD_QUERY_QP = 7,
+ OCRDMA_CMD_MODIFY_QP = 8 ,
+ OCRDMA_CMD_DELETE_QP = 9,
+
+ OCRDMA_CMD_RSVD1 = 10,
+ OCRDMA_CMD_ALLOC_LKEY = 11,
+ OCRDMA_CMD_DEALLOC_LKEY = 12,
+ OCRDMA_CMD_REGISTER_NSMR = 13,
+ OCRDMA_CMD_REREGISTER_NSMR = 14,
+ OCRDMA_CMD_REGISTER_NSMR_CONT = 15,
+ OCRDMA_CMD_QUERY_NSMR = 16,
+ OCRDMA_CMD_ALLOC_MW = 17,
+ OCRDMA_CMD_QUERY_MW = 18,
+
+ OCRDMA_CMD_CREATE_SRQ = 19,
+ OCRDMA_CMD_QUERY_SRQ = 20,
+ OCRDMA_CMD_MODIFY_SRQ = 21,
+ OCRDMA_CMD_DELETE_SRQ = 22,
+
+ OCRDMA_CMD_ATTACH_MCAST = 23,
+ OCRDMA_CMD_DETACH_MCAST = 24,
+
+ OCRDMA_CMD_CREATE_RBQ = 25,
+ OCRDMA_CMD_DESTROY_RBQ = 26,
+
+ OCRDMA_CMD_GET_RDMA_STATS = 27,
OCRDMA_CMD_MAX
};
@@ -103,7 +107,7 @@ enum {
#define OCRDMA_MAX_QP 2048
#define OCRDMA_MAX_CQ 2048
-#define OCRDMA_MAX_STAG 8192
+#define OCRDMA_MAX_STAG 16384
enum {
OCRDMA_DB_RQ_OFFSET = 0xE0,
@@ -422,7 +426,12 @@ struct ocrdma_ae_qp_mcqe {
#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
-#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
+
+enum ocrdma_async_grp5_events {
+ OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
+ OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02,
+ OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03
+};
enum OCRDMA_ASYNC_EVENT_TYPE {
OCRDMA_CQ_ERROR = 0x00,
@@ -525,8 +534,8 @@ struct ocrdma_mbx_query_config {
u32 max_ird_ord_per_qp;
u32 max_shared_ird_ord;
u32 max_mr;
- u32 max_mr_size_lo;
u32 max_mr_size_hi;
+ u32 max_mr_size_lo;
u32 max_num_mr_pbl;
u32 max_mw;
u32 max_fmr;
@@ -580,17 +589,26 @@ enum {
OCRDMA_FN_MODE_RDMA = 0x4
};
+enum {
+ OCRDMA_IF_TYPE_MASK = 0xFFFF0000,
+ OCRDMA_IF_TYPE_SHIFT = 0x10,
+ OCRDMA_PHY_TYPE_MASK = 0x0000FFFF,
+ OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000,
+ OCRDMA_FUTURE_DETAILS_SHIFT = 0x10,
+ OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF,
+ OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000,
+ OCRDMA_FSPEED_SUPP_SHIFT = 0x10,
+ OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF
+};
+
struct ocrdma_get_phy_info_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
- u16 phy_type;
- u16 interface_type;
+ u32 ityp_ptyp;
u32 misc_params;
- u16 ext_phy_details;
- u16 rsvd;
- u16 auto_speeds_supported;
- u16 fixed_speeds_supported;
+ u32 ftrdtl_exphydtl;
+ u32 fspeed_aspeed;
u32 future_use[2];
};
@@ -603,19 +621,34 @@ enum {
OCRDMA_PHY_SPEED_40GBPS = 0x20
};
+enum {
+ OCRDMA_PORT_NUM_MASK = 0x3F,
+ OCRDMA_PT_MASK = 0xC0,
+ OCRDMA_PT_SHIFT = 0x6,
+ OCRDMA_LINK_DUP_MASK = 0x0000FF00,
+ OCRDMA_LINK_DUP_SHIFT = 0x8,
+ OCRDMA_PHY_PS_MASK = 0x00FF0000,
+ OCRDMA_PHY_PS_SHIFT = 0x10,
+ OCRDMA_PHY_PFLT_MASK = 0xFF000000,
+ OCRDMA_PHY_PFLT_SHIFT = 0x18,
+ OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
+ OCRDMA_QOS_LNKSP_SHIFT = 0x10,
+ OCRDMA_LLST_MASK = 0xFF,
+ OCRDMA_PLFC_MASK = 0x00000400,
+ OCRDMA_PLFC_SHIFT = 0x8,
+ OCRDMA_PLRFC_MASK = 0x00000200,
+ OCRDMA_PLRFC_SHIFT = 0x8,
+ OCRDMA_PLTFC_MASK = 0x00000100,
+ OCRDMA_PLTFC_SHIFT = 0x8
+};
struct ocrdma_get_link_speed_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
- u8 pt_port_num;
- u8 link_duplex;
- u8 phys_port_speed;
- u8 phys_port_fault;
- u16 rsvd1;
- u16 qos_lnk_speed;
- u8 logical_lnk_status;
- u8 rsvd2[3];
+ u32 pflt_pps_ld_pnum;
+ u32 qos_lsp;
+ u32 res_lls;
};
enum {
@@ -666,8 +699,7 @@ struct ocrdma_create_cq_cmd {
u32 pgsz_pgcnt;
u32 ev_cnt_flags;
u32 eqn;
- u16 cqe_count;
- u16 pd_id;
+ u32 pdid_cqecnt;
u32 rsvd6;
struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
};
@@ -678,6 +710,10 @@ struct ocrdma_create_cq {
};
enum {
+ OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10
+};
+
+enum {
OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
};
@@ -1231,7 +1267,6 @@ struct ocrdma_destroy_srq {
enum {
OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
- OCRDMA_PD_MAX_DPP_ENABLED_QP = 8,
OCRDMA_DPP_PAGE_SIZE = 4096
};
@@ -1896,12 +1931,62 @@ struct ocrdma_rdma_stats_resp {
struct ocrdma_rx_dbg_stats rx_dbg_stats;
} __packed;
+enum {
+ OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_CV_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000,
+ OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000,
+ OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E,
+ OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000,
+ OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000,
+ OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF
+};
struct mgmt_hba_attribs {
u8 flashrom_version_string[32];
u8 manufacturer_name[32];
u32 supported_modes;
- u32 rsvd0[3];
+ u32 rsvd_eprom_verhi_verlo;
+ u32 mbx_ds_ver;
+ u32 epfw_ds_ver;
u8 ncsi_ver_string[12];
u32 default_extended_timeout;
u8 controller_model_number[32];
@@ -1914,34 +1999,26 @@ struct mgmt_hba_attribs {
u8 driver_version_string[32];
u8 fw_on_flash_version_string[32];
u32 functionalities_supported;
- u16 max_cdblength;
- u8 asic_revision;
- u8 generational_guid[16];
- u8 hba_port_count;
- u16 default_link_down_timeout;
- u8 iscsi_ver_min_max;
- u8 multifunction_device;
- u8 cache_valid;
- u8 hba_status;
- u8 max_domains_supported;
- u8 phy_port;
+ u32 guid0_asicrev_cdblen;
+ u8 generational_guid[12];
+ u32 portcnt_guid15;
+ u32 mfuncdev_iscsi_ldtout;
+ u32 ptpnum_maxdoms_hbast_cv;
u32 firmware_post_status;
u32 hba_mtu[8];
- u32 rsvd1[4];
+ u32 res_asicgen_iscsi_feaures;
+ u32 rsvd1[3];
};
struct mgmt_controller_attrib {
struct mgmt_hba_attribs hba_attribs;
- u16 pci_vendor_id;
- u16 pci_device_id;
- u16 pci_sub_vendor_id;
- u16 pci_sub_system_id;
- u8 pci_bus_number;
- u8 pci_device_number;
- u8 pci_function_number;
- u8 interface_type;
- u64 unique_identifier;
- u32 rsvd0[5];
+ u32 pci_did_vid;
+ u32 pci_ssid_svid;
+ u32 ityp_fnum_devnum_bnum;
+ u32 uid_hi;
+ u32 uid_lo;
+ u32 res_nnetfil;
+ u32 rsvd0[4];
};
struct ocrdma_get_ctrl_attribs_rsp {
@@ -1949,5 +2026,79 @@ struct ocrdma_get_ctrl_attribs_rsp {
struct mgmt_controller_attrib ctrl_attribs;
};
+#define OCRDMA_SUBSYS_DCBX 0x10
+
+enum OCRDMA_DCBX_OPCODE {
+ OCRDMA_CMD_GET_DCBX_CONFIG = 0x01
+};
+
+enum OCRDMA_DCBX_PARAM_TYPE {
+ OCRDMA_PARAMETER_TYPE_ADMIN = 0x00,
+ OCRDMA_PARAMETER_TYPE_OPER = 0x01,
+ OCRDMA_PARAMETER_TYPE_PEER = 0x02
+};
+
+enum OCRDMA_DCBX_APP_PROTO {
+ OCRDMA_APP_PROTO_ROCE = 0x8915
+};
+
+enum OCRDMA_DCBX_PROTO {
+ OCRDMA_PROTO_SELECT_L2 = 0x00,
+ OCRDMA_PROTO_SELECT_L4 = 0x01
+};
+
+enum OCRDMA_DCBX_APP_PARAM {
+ OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF,
+ OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF,
+ OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10,
+ OCRDMA_APP_PARAM_VALID_MASK = 0xFF,
+ OCRDMA_APP_PARAM_VALID_SHIFT = 0x18
+};
+
+enum OCRDMA_DCBX_STATE_FLAGS {
+ OCRDMA_STATE_FLAG_ENABLED = 0x01,
+ OCRDMA_STATE_FLAG_ADDVERTISED = 0x02,
+ OCRDMA_STATE_FLAG_WILLING = 0x04,
+ OCRDMA_STATE_FLAG_SYNC = 0x08,
+ OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000,
+ OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000
+};
+
+enum OCRDMA_TCV_AEV_OPV_ST {
+ OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF,
+ OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18,
+ OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10,
+ OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08,
+ OCRDMA_DCBX_STATE_MASK = 0xFF
+};
+
+struct ocrdma_app_parameter {
+ u32 valid_proto_app;
+ u32 oui;
+ u32 app_prio[2];
+};
+
+struct ocrdma_dcbx_cfg {
+ u32 tcv_aev_opv_st;
+ u32 tc_state;
+ u32 pfc_state;
+ u32 qcn_state;
+ u32 appl_state;
+ u32 ll_state;
+ u32 tc_bw[2];
+ u32 tc_prio[8];
+ u32 pfc_prio[2];
+ struct ocrdma_app_parameter app_param[15];
+};
+
+struct ocrdma_get_dcbx_cfg_req {
+ struct ocrdma_mbx_hdr hdr;
+ u32 param_type;
+} __packed;
+
+struct ocrdma_get_dcbx_cfg_rsp {
+ struct ocrdma_mbx_rsp hdr;
+ struct ocrdma_dcbx_cfg cfg;
+} __packed;
#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index edf6211d84b..acb434d1690 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
- attr->max_mr_size = ~0ull;
+ attr->max_mr_size = dev->attr.max_mr_size;
attr->page_size_cap = 0xffff000;
attr->vendor_id = dev->nic_info.pdev->vendor;
attr->vendor_part_id = dev->nic_info.pdev->device;
- attr->hw_ver = 0;
+ attr->hw_ver = dev->asic_id;
attr->max_qp = dev->attr.max_qp;
attr->max_ah = OCRDMA_MAX_AH;
attr->max_qp_wr = dev->attr.max_wqe;
@@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
pd->dpp_enabled =
ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
pd->num_dpp_qp =
- pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+ pd->dpp_enabled ? (dev->nic_info.db_page_size /
+ dev->attr.wqe_size) : 0;
}
retry:
@@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
struct ocrdma_pd *pd = uctx->cntxt_pd;
struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
- BUG_ON(uctx->pd_in_use);
+ if (uctx->pd_in_use) {
+ pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+ __func__, dev->id, pd->id);
+ }
uctx->cntxt_pd = NULL;
status = _ocrdma_dealloc_pd(dev, pd);
return status;
@@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
+
+ /* Don't stop cleanup, in case FW is unresponsive */
+ if (dev->mqe_ctx.fw_error_state) {
+ status = 0;
+ pr_err("%s(%d) fw not responding.\n",
+ __func__, dev->id);
+ }
return status;
}
@@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
while (wr) {
+ if (qp->qp_type == IB_QPT_UD &&
+ (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)) {
+ *bad_wr = wr;
+ status = -EINVAL;
+ break;
+ }
if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
wr->num_sge > qp->sq.max_sges) {
*bad_wr = wr;
@@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
*stop = true;
expand = false;
}
+ } else if (is_hw_sq_empty(qp)) {
+ /* Do nothing */
+ expand = false;
+ *polled = false;
+ *stop = false;
} else {
*polled = true;
expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
@@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
*stop = true;
expand = false;
}
+ } else if (is_hw_rq_empty(qp)) {
+ /* Do nothing */
+ expand = false;
+ *polled = false;
+ *stop = false;
} else {
*polled = true;
expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 22c720e5740..636be117b57 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev)
ibp = &dd->pport[p].ibport_data;
agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index c639f90cfda..3edce617c31 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,7 +86,6 @@ enum {
IPOIB_FLAG_INITIALIZED = 1,
IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3,
- IPOIB_PKEY_STOP = 4,
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
@@ -312,7 +311,6 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
- struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
@@ -473,10 +471,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
-int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_open(struct net_device *dev, int flush);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
@@ -532,8 +531,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf);
void ipoib_setup(struct net_device *dev);
-void ipoib_pkey_poll(struct work_struct *work);
-int ipoib_pkey_dev_delay_open(struct net_device *dev);
+void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 50061854616..6bd5740e269 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (priv->mcg_dentry)
- debugfs_remove(priv->mcg_dentry);
- if (priv->path_dentry)
- debugfs_remove(priv->path_dentry);
+ debugfs_remove(priv->mcg_dentry);
+ debugfs_remove(priv->path_dentry);
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 6a7003ddb0b..72626c34817 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
drain_tx_cq((struct net_device *)ctx);
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_open(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
- ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
return -1;
}
- set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = ipoib_init_qp(dev);
if (ret) {
@@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_stop(dev, flush);
return -1;
}
-static void ipoib_pkey_dev_check_presence(struct net_device *dev)
+void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- u16 pkey_index = 0;
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+ if (!(priv->pkey & 0x7fff) ||
+ ib_find_pkey(priv->ca, priv->port, priv->pkey,
+ &priv->pkey_index))
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
else
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
@@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
- /* Shutdown the P_Key thread if still active */
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- mutex_lock(&pkey_mutex);
- set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work_sync(&priv->pkey_poll_task);
- mutex_unlock(&pkey_mutex);
- }
-
ipoib_mcast_stop_thread(dev, flush);
ipoib_mcast_dev_flush(dev);
@@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
+ if (ipoib_ib_dev_open(dev, 1)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
@@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
return 1;
}
+/*
+ * returns 0 if pkey value was found in a different slot.
+ */
+static inline int update_child_pkey(struct ipoib_dev_priv *priv)
+{
+ u16 old_index = priv->pkey_index;
+
+ priv->pkey_index = 0;
+ ipoib_pkey_dev_check_presence(priv->dev);
+
+ if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+ (old_index == priv->pkey_index))
+ return 1;
+ return 0;
+}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level)
{
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
- u16 new_index;
int result;
down_read(&priv->vlan_rwsem);
@@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
up_read(&priv->vlan_rwsem);
- if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
- /* for non-child devices must check/update the pkey value here */
- if (level == IPOIB_FLUSH_HEAVY &&
- !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
- update_parent_pkey(priv);
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
+ level != IPOIB_FLUSH_HEAVY) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
+ /* interface is down. update pkey and leave. */
+ if (level == IPOIB_FLUSH_HEAVY) {
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+ update_parent_pkey(priv);
+ else
+ update_child_pkey(priv);
+ }
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
return;
}
@@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
* (parent) devices should always takes what present in pkey index 0
*/
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- ipoib_ib_dev_down(dev, 0);
- ipoib_ib_dev_stop(dev, 0);
- if (ipoib_pkey_dev_delay_open(dev))
- return;
- }
- /* restart QP only if P_Key index is changed */
- if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
- new_index == priv->pkey_index) {
+ result = update_child_pkey(priv);
+ if (result) {
+ /* restart QP only if P_Key index is changed */
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
- priv->pkey_index = new_index;
+
} else {
result = update_parent_pkey(priv);
/* restart QP only if P_Key value changed */
@@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_ib_dev_down(dev, 0);
if (level == IPOIB_FLUSH_HEAVY) {
- ipoib_ib_dev_stop(dev, 0);
- ipoib_ib_dev_open(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ ipoib_ib_dev_stop(dev, 0);
+ if (ipoib_ib_dev_open(dev, 0) != 0)
+ return;
+ if (netif_queue_stopped(dev))
+ netif_start_queue(dev);
}
/*
@@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
ipoib_transport_dev_cleanup(dev);
}
-/*
- * Delayed P_Key Assigment Interim Support
- *
- * The following is initial implementation of delayed P_Key assigment
- * mechanism. It is using the same approach implemented for the multicast
- * group join. The single goal of this implementation is to quickly address
- * Bug #2507. This implementation will probably be removed when the P_Key
- * change async notification is available.
- */
-
-void ipoib_pkey_poll(struct work_struct *work)
-{
- struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
- struct net_device *dev = priv->dev;
-
- ipoib_pkey_dev_check_presence(dev);
-
- if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
- ipoib_open(dev);
- else {
- mutex_lock(&pkey_mutex);
- if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->pkey_poll_task,
- HZ);
- mutex_unlock(&pkey_mutex);
- }
-}
-
-int ipoib_pkey_dev_delay_open(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- /* Look for the interface pkey value in the IB Port P_Key table and */
- /* set the interface pkey assigment flag */
- ipoib_pkey_dev_check_presence(dev);
- /* P_Key value not assigned yet - start polling */
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- mutex_lock(&pkey_mutex);
- clear_bit(IPOIB_PKEY_STOP, &priv->flags);
- queue_delayed_work(ipoib_workqueue,
- &priv->pkey_poll_task,
- HZ);
- mutex_unlock(&pkey_mutex);
- return 1;
- }
-
- return 0;
-}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4e675f4fecc..1310acf6bf9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- if (ipoib_pkey_dev_delay_open(dev))
- return 0;
-
- if (ipoib_ib_dev_open(dev))
+ if (ipoib_ib_dev_open(dev, 1)) {
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+ return 0;
goto err_disable;
+ }
if (ipoib_ib_dev_up(dev))
goto err_stop;
@@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
- INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index eb7973957a6..61ee91d8838 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -596,20 +596,28 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
- ep = iscsi_create_endpoint(sizeof(*ib_conn));
+ ep = iscsi_create_endpoint(0);
if (!ep)
return ERR_PTR(-ENOMEM);
- ib_conn = ep->dd_data;
+ ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL);
+ if (!ib_conn) {
+ err = -ENOMEM;
+ goto failure;
+ }
+
+ ep->dd_data = ib_conn;
ib_conn->ep = ep;
iser_conn_init(ib_conn);
- err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
- non_blocking);
+ err = iser_connect(ib_conn, NULL, dst_addr, non_blocking);
if (err)
- return ERR_PTR(err);
+ goto failure;
return ep;
+failure:
+ iscsi_destroy_endpoint(ep);
+ return ERR_PTR(err);
}
static int
@@ -619,15 +627,16 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
int rc;
ib_conn = ep->dd_data;
- rc = wait_event_interruptible_timeout(ib_conn->wait,
- ib_conn->state == ISER_CONN_UP,
- msecs_to_jiffies(timeout_ms));
-
+ rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion,
+ msecs_to_jiffies(timeout_ms));
/* if conn establishment failed, return error code to iscsi */
- if (!rc &&
- (ib_conn->state == ISER_CONN_TERMINATING ||
- ib_conn->state == ISER_CONN_DOWN))
- rc = -1;
+ if (rc == 0) {
+ mutex_lock(&ib_conn->state_mutex);
+ if (ib_conn->state == ISER_CONN_TERMINATING ||
+ ib_conn->state == ISER_CONN_DOWN)
+ rc = -1;
+ mutex_unlock(&ib_conn->state_mutex);
+ }
iser_info("ib conn %p rc = %d\n", ib_conn, rc);
@@ -646,19 +655,25 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
ib_conn = ep->dd_data;
iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
+ mutex_lock(&ib_conn->state_mutex);
iser_conn_terminate(ib_conn);
/*
- * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
- * call and ISER_CONN_DOWN state before freeing the iser resources.
- * otherwise we are safe to free resources immediately.
+ * if iser_conn and iscsi_conn are bound, we must wait for
+ * iscsi_conn_stop and flush errors completion before freeing
+ * the iser resources. Otherwise we are safe to free resources
+ * immediately.
*/
if (ib_conn->iscsi_conn) {
INIT_WORK(&ib_conn->release_work, iser_release_work);
queue_work(release_wq, &ib_conn->release_work);
+ mutex_unlock(&ib_conn->state_mutex);
} else {
+ ib_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&ib_conn->state_mutex);
iser_conn_release(ib_conn);
}
+ iscsi_destroy_endpoint(ep);
}
static umode_t iser_attr_is_visible(int param_type, int param)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 97cd385bf7f..c877dad381c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -326,7 +326,6 @@ struct iser_conn {
struct iser_device *device; /* device context */
struct rdma_cm_id *cma_id; /* CMA ID */
struct ib_qp *qp; /* QP */
- wait_queue_head_t wait; /* waitq for conn/disconn */
unsigned qp_max_recv_dtos; /* num of rx buffers */
unsigned qp_max_recv_dtos_mask; /* above minus 1 */
unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
@@ -335,6 +334,9 @@ struct iser_conn {
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
struct completion stop_completion;
+ struct mutex state_mutex;
+ struct completion flush_completion;
+ struct completion up_completion;
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -448,8 +450,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr_in *src_addr,
- struct sockaddr_in *dst_addr,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
int non_blocking);
int iser_reg_page_vec(struct iser_conn *ib_conn,
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ea01075f9f9..3ef167f97d6 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -491,10 +491,9 @@ out_err:
}
/**
- * releases the QP objects, returns 0 on success,
- * -1 on failure
+ * releases the QP object
*/
-static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
+static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
{
int cq_index;
BUG_ON(ib_conn == NULL);
@@ -513,8 +512,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
}
ib_conn->qp = NULL;
-
- return 0;
}
/**
@@ -568,31 +565,40 @@ static void iser_device_try_release(struct iser_device *device)
mutex_unlock(&ig.device_list_mutex);
}
+/**
+ * Called with state mutex held
+ **/
static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
enum iser_ib_conn_state comp,
enum iser_ib_conn_state exch)
{
int ret;
- spin_lock_bh(&ib_conn->lock);
if ((ret = (ib_conn->state == comp)))
ib_conn->state = exch;
- spin_unlock_bh(&ib_conn->lock);
return ret;
}
void iser_release_work(struct work_struct *work)
{
struct iser_conn *ib_conn;
+ int rc;
ib_conn = container_of(work, struct iser_conn, release_work);
/* wait for .conn_stop callback */
- wait_for_completion(&ib_conn->stop_completion);
+ rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
+ WARN_ON(rc == 0);
/* wait for the qp`s post send and post receive buffers to empty */
- wait_event_interruptible(ib_conn->wait,
- ib_conn->state == ISER_CONN_DOWN);
+ rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
+ WARN_ON(rc == 0);
+
+ ib_conn->state = ISER_CONN_DOWN;
+
+ mutex_lock(&ib_conn->state_mutex);
+ ib_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&ib_conn->state_mutex);
iser_conn_release(ib_conn);
}
@@ -604,23 +610,27 @@ void iser_conn_release(struct iser_conn *ib_conn)
{
struct iser_device *device = ib_conn->device;
- BUG_ON(ib_conn->state == ISER_CONN_UP);
-
mutex_lock(&ig.connlist_mutex);
list_del(&ib_conn->conn_list);
mutex_unlock(&ig.connlist_mutex);
+
+ mutex_lock(&ib_conn->state_mutex);
+ BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+
iser_free_rx_descriptors(ib_conn);
iser_free_ib_conn_res(ib_conn);
ib_conn->device = NULL;
/* on EVENT_ADDR_ERROR there's no device yet for this conn */
if (device != NULL)
iser_device_try_release(device);
+ mutex_unlock(&ib_conn->state_mutex);
+
/* if cma handler context, the caller actually destroy the id */
if (ib_conn->cma_id != NULL) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
- iscsi_destroy_endpoint(ib_conn->ep);
+ kfree(ib_conn);
}
/**
@@ -642,22 +652,31 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
ib_conn,err);
}
+/**
+ * Called with state mutex held
+ **/
static void iser_connect_error(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
ib_conn = (struct iser_conn *)cma_id->context;
-
ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
}
+/**
+ * Called with state mutex held
+ **/
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
struct iser_conn *ib_conn;
int ret;
+ ib_conn = (struct iser_conn *)cma_id->context;
+ if (ib_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
device = iser_device_find_by_ib_device(cma_id);
if (!device) {
iser_err("device lookup/creation failed\n");
@@ -665,7 +684,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
return;
}
- ib_conn = (struct iser_conn *)cma_id->context;
ib_conn->device = device;
/* connection T10-PI support */
@@ -689,18 +707,27 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
}
}
+/**
+ * Called with state mutex held
+ **/
static void iser_route_handler(struct rdma_cm_id *cma_id)
{
struct rdma_conn_param conn_param;
int ret;
struct iser_cm_hdr req_hdr;
+ struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
+ struct iser_device *device = ib_conn->device;
+
+ if (ib_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
if (ret)
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = 4;
+ conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
@@ -728,12 +755,16 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
+ ib_conn = (struct iser_conn *)cma_id->context;
+ if (ib_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
- ib_conn = (struct iser_conn *)cma_id->context;
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
- wake_up_interruptible(&ib_conn->wait);
+ ib_conn->state = ISER_CONN_UP;
+ complete(&ib_conn->up_completion);
}
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
@@ -752,19 +783,25 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
iser_err("iscsi_iser connection isn't bound\n");
}
- /* Complete the termination process if no posts are pending */
+ /* Complete the termination process if no posts are pending. This code
+ * block also exists in iser_handle_comp_error(), but it is needed here
+ * for cases of no flushes at all, e.g. discovery over rdma.
+ */
if (ib_conn->post_recv_buf_count == 0 &&
(atomic_read(&ib_conn->post_send_buf_count) == 0)) {
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
+ complete(&ib_conn->flush_completion);
}
}
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
+ struct iser_conn *ib_conn;
+
+ ib_conn = (struct iser_conn *)cma_id->context;
iser_info("event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
+ mutex_lock(&ib_conn->state_mutex);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
iser_addr_handler(cma_id);
@@ -785,24 +822,28 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
iser_disconnected_handler(cma_id);
break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
break;
}
+ mutex_unlock(&ib_conn->state_mutex);
return 0;
}
void iser_conn_init(struct iser_conn *ib_conn)
{
ib_conn->state = ISER_CONN_INIT;
- init_waitqueue_head(&ib_conn->wait);
ib_conn->post_recv_buf_count = 0;
atomic_set(&ib_conn->post_send_buf_count, 0);
init_completion(&ib_conn->stop_completion);
+ init_completion(&ib_conn->flush_completion);
+ init_completion(&ib_conn->up_completion);
INIT_LIST_HEAD(&ib_conn->conn_list);
spin_lock_init(&ib_conn->lock);
+ mutex_init(&ib_conn->state_mutex);
}
/**
@@ -810,22 +851,21 @@ void iser_conn_init(struct iser_conn *ib_conn)
* sleeps until the connection is established or rejected
*/
int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr_in *src_addr,
- struct sockaddr_in *dst_addr,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
int non_blocking)
{
- struct sockaddr *src, *dst;
int err = 0;
- sprintf(ib_conn->name, "%pI4:%d",
- &dst_addr->sin_addr.s_addr, dst_addr->sin_port);
+ mutex_lock(&ib_conn->state_mutex);
+
+ sprintf(ib_conn->name, "%pISp", dst_addr);
+
+ iser_info("connecting to: %s\n", ib_conn->name);
/* the device is known only --after-- address resolution */
ib_conn->device = NULL;
- iser_info("connecting to: %pI4, port 0x%x\n",
- &dst_addr->sin_addr, dst_addr->sin_port);
-
ib_conn->state = ISER_CONN_PENDING;
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
@@ -837,23 +877,21 @@ int iser_connect(struct iser_conn *ib_conn,
goto id_failure;
}
- src = (struct sockaddr *)src_addr;
- dst = (struct sockaddr *)dst_addr;
- err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
+ err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
if (err) {
iser_err("rdma_resolve_addr failed: %d\n", err);
goto addr_failure;
}
if (!non_blocking) {
- wait_event_interruptible(ib_conn->wait,
- (ib_conn->state != ISER_CONN_PENDING));
+ wait_for_completion_interruptible(&ib_conn->up_completion);
if (ib_conn->state != ISER_CONN_UP) {
err = -EIO;
goto connect_failure;
}
}
+ mutex_unlock(&ib_conn->state_mutex);
mutex_lock(&ig.connlist_mutex);
list_add(&ib_conn->conn_list, &ig.connlist);
@@ -865,6 +903,7 @@ id_failure:
addr_failure:
ib_conn->state = ISER_CONN_DOWN;
connect_failure:
+ mutex_unlock(&ib_conn->state_mutex);
iser_conn_release(ib_conn);
return err;
}
@@ -1049,18 +1088,19 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
if (ib_conn->post_recv_buf_count == 0 &&
atomic_read(&ib_conn->post_send_buf_count) == 0) {
- /* getting here when the state is UP means that the conn is *
- * being terminated asynchronously from the iSCSI layer's *
- * perspective. */
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING))
+ /**
+ * getting here when the state is UP means that the conn is
+ * being terminated asynchronously from the iSCSI layer's
+ * perspective. It is safe to peek at the connection state
+ * since iscsi_conn_failure is allowed to be called twice.
+ **/
+ if (ib_conn->state == ISER_CONN_UP)
iscsi_conn_failure(ib_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
/* no more non completed posts to the QP, complete the
* termination process w.o worrying on disconnect event */
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
+ complete(&ib_conn->flush_completion);
}
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e3c2c5b4297..62d2a18e1b4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
+static struct workqueue_struct *srp_remove_wq;
static struct ib_client srp_client = {
.name = "srp",
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
spin_unlock_irq(&target->lock);
if (changed)
- queue_work(system_long_wq, &target->remove_work);
+ queue_work(srp_remove_wq, &target->remove_work);
return changed;
}
@@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
SCSI_SENSE_BUFFERSIZE));
}
- if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
- scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
- else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+ if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
+ scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
+ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
+ scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
srp_free_req(target, req, scmnd,
be32_to_cpu(rsp->req_lim_delta));
@@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device)
spin_unlock(&host->target_lock);
/*
- * Wait for target port removal tasks.
+ * Wait for tl_err and target port removal tasks.
*/
flush_workqueue(system_long_wq);
+ flush_workqueue(srp_remove_wq);
kfree(host);
}
@@ -3313,16 +3319,22 @@ static int __init srp_init_module(void)
indirect_sg_entries = cmd_sg_entries;
}
+ srp_remove_wq = create_workqueue("srp_remove");
+ if (!srp_remove_wq) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = -ENOMEM;
ib_srp_transport_template =
srp_attach_transport(&ib_srp_transport_functions);
if (!ib_srp_transport_template)
- return -ENOMEM;
+ goto destroy_wq;
ret = class_register(&srp_class);
if (ret) {
pr_err("couldn't register class infiniband_srp\n");
- srp_release_transport(ib_srp_transport_template);
- return ret;
+ goto release_tr;
}
ib_sa_register_client(&srp_sa_client);
@@ -3330,13 +3342,22 @@ static int __init srp_init_module(void)
ret = ib_register_client(&srp_client);
if (ret) {
pr_err("couldn't register IB client\n");
- srp_release_transport(ib_srp_transport_template);
- ib_sa_unregister_client(&srp_sa_client);
- class_unregister(&srp_class);
- return ret;
+ goto unreg_sa;
}
- return 0;
+out:
+ return ret;
+
+unreg_sa:
+ ib_sa_unregister_client(&srp_sa_client);
+ class_unregister(&srp_class);
+
+release_tr:
+ srp_release_transport(ib_srp_transport_template);
+
+destroy_wq:
+ destroy_workqueue(srp_remove_wq);
+ goto out;
}
static void __exit srp_cleanup_module(void)
@@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void)
ib_sa_unregister_client(&srp_sa_client);
class_unregister(&srp_class);
srp_release_transport(ib_srp_transport_template);
+ destroy_workqueue(srp_remove_wq);
}
module_init(srp_init_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index fe09f2788b1..d28a8c284da 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
case IB_EVENT_PKEY_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
+ case IB_EVENT_GID_CHANGE:
/* Refresh port data asynchronously. */
if (event->element.port_num <= sdev->device->phys_port_cnt) {
sport = &sdev->port[event->element.port_num - 1];
@@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
&reg_req, 0,
srpt_mad_send_handler,
srpt_mad_recv_handler,
- sport);
+ sport, 0);
if (IS_ERR(sport->mad_agent)) {
ret = PTR_ERR(sport->mad_agent);
sport->mad_agent = NULL;
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 443d03fbac4..8eeab72b93e 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg)
mutex_unlock(&ca->set->bucket_lock);
blkdev_issue_discard(ca->bdev,
bucket_to_sector(ca->set, bucket),
- ca->sb.block_size, GFP_KERNEL, 0);
+ ca->sb.bucket_size, GFP_KERNEL, 0);
mutex_lock(&ca->set->bucket_lock);
}
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d2ebcf32309..04f7bc28ef8 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -477,9 +477,13 @@ struct gc_stat {
* CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
* we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
* flushing dirty data).
+ *
+ * CACHE_SET_RUNNING means all cache devices have been registered and journal
+ * replay is complete.
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
+#define CACHE_SET_RUNNING 2
struct cache_set {
struct closure cl;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 54541641530..646fe85261c 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
{
uint64_t start_time;
bool used_mempool = false;
- struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
+ struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
order);
if (!out) {
struct page *outp;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 5f6728d5d4d..ae964624efb 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l,
{
return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
KEY_PTRS(l) == KEY_PTRS(r) &&
- KEY_CSUM(l) == KEY_CSUM(l));
+ KEY_CSUM(l) == KEY_CSUM(r));
}
/* Keylists */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7347b610096..00cde40db57 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -117,9 +117,9 @@
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
- struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\
+ struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
+ _w, b); \
if (!IS_ERR(_child)) { \
- _child->parent = (b); \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
rw_unlock(_w, _child); \
} else \
@@ -142,7 +142,6 @@
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) { \
- _b->parent = NULL; \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
} \
rw_unlock(_w, _b); \
@@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b)
struct bset *i = btree_bset_first(b);
struct btree_iter *iter;
- iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
+ iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
iter->used = 0;
@@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b)
SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
bset_sector_offset(&b->keys, i));
- if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
+ if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
int j;
struct bio_vec *bv;
void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -967,7 +966,8 @@ err:
* level and op->lock.
*/
struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
- struct bkey *k, int level, bool write)
+ struct bkey *k, int level, bool write,
+ struct btree *parent)
{
int i = 0;
struct btree *b;
@@ -1002,6 +1002,7 @@ retry:
BUG_ON(b->level != level);
}
+ b->parent = parent;
b->accessed = 1;
for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
@@ -1022,15 +1023,16 @@ retry:
return b;
}
-static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
+static void btree_node_prefetch(struct btree *parent, struct bkey *k)
{
struct btree *b;
- mutex_lock(&c->bucket_lock);
- b = mca_alloc(c, NULL, k, level);
- mutex_unlock(&c->bucket_lock);
+ mutex_lock(&parent->c->bucket_lock);
+ b = mca_alloc(parent->c, NULL, k, parent->level - 1);
+ mutex_unlock(&parent->c->bucket_lock);
if (!IS_ERR_OR_NULL(b)) {
+ b->parent = parent;
bch_btree_node_read(b);
rw_unlock(true, b);
}
@@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b)
mutex_unlock(&b->c->bucket_lock);
}
-struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
- int level)
+struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ int level, bool wait,
+ struct btree *parent)
{
BKEY_PADDED(key) k;
struct btree *b = ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock);
retry:
- if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL))
+ if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
goto err;
bkey_put(c, &k.key);
@@ -1085,6 +1088,7 @@ retry:
}
b->accessed = 1;
+ b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
mutex_unlock(&c->bucket_lock);
@@ -1096,14 +1100,21 @@ err_free:
err:
mutex_unlock(&c->bucket_lock);
- trace_bcache_btree_node_alloc_fail(b);
+ trace_bcache_btree_node_alloc_fail(c);
return b;
}
+static struct btree *bch_btree_node_alloc(struct cache_set *c,
+ struct btree_op *op, int level,
+ struct btree *parent)
+{
+ return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
+}
+
static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
- struct btree *n = bch_btree_node_alloc(b->c, op, b->level);
+ struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
BUG_ON(btree_bset_first(new_nodes[0])->keys);
btree_node_free(new_nodes[0]);
rw_unlock(true, new_nodes[0]);
+ new_nodes[0] = NULL;
for (i = 0; i < nodes; i++) {
if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
@@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
- true);
+ true, b);
if (IS_ERR(r->b)) {
ret = PTR_ERR(r->b);
break;
@@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad);
if (k)
- btree_node_prefetch(b->c, k, b->level - 1);
+ btree_node_prefetch(b, k);
if (p)
ret = btree(check_recurse, p, b, op);
@@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op,
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
- n2 = bch_btree_node_alloc(b->c, op, b->level);
+ n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
if (IS_ERR(n2))
goto err_free1;
if (!b->parent) {
- n3 = bch_btree_node_alloc(b->c, op, b->level + 1);
+ n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
if (IS_ERR(n3))
goto err_free2;
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 91dfa5e6968..5c391fa01be 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *);
-struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
+struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
+ int, bool, struct btree *);
struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
- struct bkey *, int, bool);
+ struct bkey *, int, bool, struct btree *);
int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 3a0de4cf977..243de0bf15c 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -474,9 +474,8 @@ out:
return false;
}
-static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
{
- struct btree *b = container_of(bk, struct btree, keys);
char buf[80];
if (!KEY_SIZE(k))
@@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
if (KEY_SIZE(k) > KEY_OFFSET(k))
goto bad;
- if (__ptr_invalid(b->c, k))
+ if (__ptr_invalid(c, k))
goto bad;
return false;
bad:
bch_extent_to_text(buf, sizeof(buf), k);
- cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
+ cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
return true;
}
+static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+{
+ struct btree *b = container_of(bk, struct btree, keys);
+ return __bch_extent_invalid(b->c, k);
+}
+
static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
unsigned ptr)
{
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index e4e23409782..e2ed54054e7 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -9,5 +9,6 @@ struct cache_set;
void bch_extent_to_text(char *, size_t, const struct bkey *);
bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
+bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 59e82021b5b..fe080ad0e55 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -7,6 +7,7 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
+#include "extents.h"
#include <trace/events/bcache.h>
@@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
if (read_bucket(l))
goto bsearch;
- if (list_empty(list))
+ /* no journal entries on this device? */
+ if (l == ca->sb.njournal_buckets)
continue;
bsearch:
+ BUG_ON(list_empty(list));
+
/* Binary search */
- m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
+ m = l;
+ r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
pr_debug("starting binary search, l %u r %u", l, r);
while (l + 1 < r) {
@@ -291,15 +296,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
for (k = i->j.start;
k < bset_bkey_last(&i->j);
- k = bkey_next(k)) {
- unsigned j;
+ k = bkey_next(k))
+ if (!__bch_extent_invalid(c, k)) {
+ unsigned j;
- for (j = 0; j < KEY_PTRS(k); j++)
- if (ptr_available(c, k, j))
- atomic_inc(&PTR_BUCKET(c, k, j)->pin);
+ for (j = 0; j < KEY_PTRS(k); j++)
+ if (ptr_available(c, k, j))
+ atomic_inc(&PTR_BUCKET(c, k, j)->pin);
- bch_initial_mark_key(c, 0, k);
- }
+ bch_initial_mark_key(c, 0, k);
+ }
}
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 15fff4f68a7..62e6e98186b 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl)
{
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
- trace_bcache_write(op->bio, op->writeback, op->bypass);
+ trace_bcache_write(op->c, op->inode, op->bio,
+ op->writeback, op->bypass);
bch_keylist_init(&op->insert_keys);
bio_get(op->bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 926ded8ccbf..d4713d098a3 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d)
static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
unsigned id)
{
- BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags));
-
d->id = id;
d->c = c;
c->devices[id] = d;
@@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
list_move(&dc->list, &uncached_devices);
clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
+ clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
mutex_unlock(&bch_register_lock);
@@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
*/
atomic_set(&dc->count, 1);
+ if (bch_cached_dev_writeback_start(dc))
+ return -ENOMEM;
+
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(dc);
atomic_set(&dc->has_dirty, 1);
@@ -1070,7 +1072,8 @@ static void cached_dev_free(struct closure *cl)
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
cancel_delayed_work_sync(&dc->writeback_rate_update);
- kthread_stop(dc->writeback_thread);
+ if (!IS_ERR_OR_NULL(dc->writeback_thread))
+ kthread_stop(dc->writeback_thread);
mutex_lock(&bch_register_lock);
@@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
- if (!IS_ERR_OR_NULL(dc->bdev)) {
- if (dc->bdev->bd_disk)
- blk_sync_queue(bdev_get_queue(dc->bdev));
-
+ if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
- }
wake_up(&unregister_wait);
@@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj)
static void flash_dev_free(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ mutex_lock(&bch_register_lock);
bcache_device_free(d);
+ mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
}
@@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ mutex_lock(&bch_register_lock);
bcache_device_unlink(d);
+ mutex_unlock(&bch_register_lock);
kobject_del(&d->kobj);
continue_at(cl, flash_dev_free, system_wq);
}
@@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
if (test_bit(CACHE_SET_STOPPING, &c->flags))
return -EINTR;
+ if (!test_bit(CACHE_SET_RUNNING, &c->flags))
+ return -EPERM;
+
u = uuid_find_empty(c);
if (!u) {
pr_err("Can't create volume, no room for UUID");
@@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl)
bch_journal_free(c);
for_each_cache(ca, c, i)
- if (ca)
+ if (ca) {
+ ca->set = NULL;
+ c->cache[ca->sb.nr_this_dev] = NULL;
kobject_put(&ca->kobj);
+ }
bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
@@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl)
if (ca->alloc_thread)
kthread_stop(ca->alloc_thread);
- cancel_delayed_work_sync(&c->journal.work);
- /* flush last journal entry if needed */
- c->journal.work.work.func(&c->journal.work.work);
+ if (c->journal.cur) {
+ cancel_delayed_work_sync(&c->journal.work);
+ /* flush last journal entry if needed */
+ c->journal.work.work.func(&c->journal.work.work);
+ }
closure_return(cl);
}
@@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
- c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
+ c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "cannot allocate new btree root";
- c->root = bch_btree_node_alloc(c, NULL, 0);
+ c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c)
flash_devs_run(c);
+ set_bit(CACHE_SET_RUNNING, &c->flags);
return;
err:
closure_sync(&cl);
@@ -1760,6 +1772,7 @@ found:
pr_debug("set version = %llu", c->sb.version);
}
+ kobject_get(&ca->kobj);
ca->set = c;
ca->set->cache[ca->sb.nr_this_dev] = ca;
c->cache_by_alloc[c->caches_loaded++] = ca;
@@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj)
struct cache *ca = container_of(kobj, struct cache, kobj);
unsigned i;
- if (ca->set)
+ if (ca->set) {
+ BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
ca->set->cache[ca->sb.nr_this_dev] = NULL;
+ }
bio_split_pool_free(&ca->bio_split_hook);
@@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj)
if (ca->sb_bio.bi_inline_vecs[0].bv_page)
put_page(ca->sb_bio.bi_io_vec[0].bv_page);
- if (!IS_ERR_OR_NULL(ca->bdev)) {
- blk_sync_queue(bdev_get_queue(ca->bdev));
+ if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
- }
kfree(ca);
module_put(THIS_MODULE);
@@ -1844,7 +1857,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
}
static void register_cache(struct cache_sb *sb, struct page *sb_page,
- struct block_device *bdev, struct cache *ca)
+ struct block_device *bdev, struct cache *ca)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
@@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
pr_info("registered cache device %s", bdevname(bdev, name));
+out:
+ kobject_put(&ca->kobj);
return;
err:
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
- kobject_put(&ca->kobj);
+ goto out;
}
/* Global interfaces/init */
@@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) {
bdev = lookup_bdev(strim(path));
+ mutex_lock(&bch_register_lock);
if (!IS_ERR(bdev) && bch_is_open(bdev))
err = "device already registered";
else
err = "device busy";
+ mutex_unlock(&bch_register_lock);
}
goto err;
}
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index ac7d0d1f70d..98df7572b5f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -416,8 +416,8 @@ do { \
average_frequency, frequency_units); \
__print_time_stat(stats, name, \
average_duration, duration_units); \
- __print_time_stat(stats, name, \
- max_duration, duration_units); \
+ sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
+ div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
\
sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
? div_s64(local_clock() - (stats)->last, \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f4300e4c011..f1986bcd1bf 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
if (KEY_START(&w->key) != dc->last_read ||
jiffies_to_msecs(delay) > 50)
while (!kthread_should_stop() && delay)
- delay = schedule_timeout_uninterruptible(delay);
+ delay = schedule_timeout_interruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
@@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg)
while (delay &&
!kthread_should_stop() &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_uninterruptible(delay);
+ delay = schedule_timeout_interruptible(delay);
}
}
@@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
}
-int bch_cached_dev_writeback_init(struct cached_dev *dc)
+void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
sema_init(&dc->in_flight, 64);
init_rwsem(&dc->writeback_lock);
@@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_d_term = 30;
dc->writeback_rate_p_term_inverse = 6000;
+ INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
+}
+
+int bch_cached_dev_writeback_start(struct cached_dev *dc)
+{
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
if (IS_ERR(dc->writeback_thread))
return PTR_ERR(dc->writeback_thread);
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
+ bch_writeback_queue(dc);
+
return 0;
}
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index e2f8598937a..0a9dab187b7 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
void bch_sectors_dirty_init(struct cached_dev *dc);
-int bch_cached_dev_writeback_init(struct cached_dev *);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+int bch_cached_dev_writeback_start(struct cached_dev *);
#endif
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index d2899e7eb3a..06709257add 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
- disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
+ disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0);
@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
bool may_format_device)
{
int r;
- cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
+ cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_METADATA_CACHE_SIZE,
CACHE_MAX_CONCURRENT_LOCKS);
if (IS_ERR(cmd->bm)) {
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index cd70a78623a..7383c90ccdb 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -9,19 +9,17 @@
#include "dm-cache-block-types.h"
#include "dm-cache-policy-internal.h"
+#include "persistent-data/dm-space-map-metadata.h"
/*----------------------------------------------------------------*/
-#define DM_CACHE_METADATA_BLOCK_SIZE 4096
+#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
/* FIXME: remove this restriction */
/*
* The metadata device is currently limited in size.
- *
- * We have one block of index, which can hold 255 index entries. Each
- * index entry contains allocation info about 16k metadata blocks.
*/
-#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
+#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
/*
* A metadata device larger than 16GB triggers a warning.
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2c63326638b..1af40ee209e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
}
+/*
+ * You must increment the deferred set whilst the prison cell is held. To
+ * encourage this, we ask for 'cell' to be passed in.
+ */
+static void inc_ds(struct cache *cache, struct bio *bio,
+ struct dm_bio_prison_cell *cell)
+{
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+ BUG_ON(!cell);
+ BUG_ON(pb->all_io_entry);
+
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+}
+
static void issue(struct cache *cache, struct bio *bio)
{
unsigned long flags;
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags);
}
+static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+{
+ inc_ds(cache, bio, cell);
+ issue(cache, bio);
+}
+
static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+
+ /*
+ * No need to inc_ds() here, since the cell will be held for the
+ * duration of the io.
+ */
generic_make_request(bio);
}
@@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
return;
INIT_LIST_HEAD(&work);
- if (pb->all_io_entry)
- dm_deferred_entry_dec(pb->all_io_entry, &work);
+ dm_deferred_entry_dec(pb->all_io_entry, &work);
if (!list_empty(&work))
queue_quiesced_migrations(cache, &work);
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
else
remap_to_cache(cache, bio, 0);
+ /*
+ * REQ_FLUSH is not directed at any particular block so we don't
+ * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
+ * by dm-core.
+ */
issue(cache, bio);
}
@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss);
}
-static void issue_cache_bio(struct cache *cache, struct bio *bio,
- struct per_bio_data *pb,
- dm_oblock_t oblock, dm_cblock_t cblock)
-{
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
- remap_to_cache_dirty(cache, bio, oblock, cblock);
- issue(cache, bio);
-}
-
static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
dm_oblock_t block = get_bio_block(cache, bio);
struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
struct policy_result lookup_result;
- size_t pb_data_size = get_per_bio_data_size(cache);
- struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
bool passthrough = passthrough_mode(&cache->features);
bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
} else {
/* FIXME: factor out issue_origin() */
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
- issue(cache, bio);
+ inc_and_issue(cache, bio, new_ocell);
}
} else {
inc_hit_counter(cache, bio);
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
if (bio_data_dir(bio) == WRITE &&
writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock)) {
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- issue(cache, bio);
- } else
- issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+ inc_and_issue(cache, bio, new_ocell);
+
+ } else {
+ remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ inc_and_issue(cache, bio, new_ocell);
+ }
}
break;
case POLICY_MISS:
inc_miss_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
- issue(cache, bio);
+ inc_and_issue(cache, bio, new_ocell);
break;
case POLICY_NEW:
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
bio_list_init(&cache->deferred_flush_bios);
spin_unlock_irqrestore(&cache->lock, flags);
+ /*
+ * These bios have already been through inc_ds()
+ */
while ((bio = bio_list_pop(&bios)))
submit_bios ? generic_make_request(bio) : bio_io_error(bio);
}
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
bio_list_init(&cache->deferred_writethrough_bios);
spin_unlock_irqrestore(&cache->lock, flags);
+ /*
+ * These bios have already been through inc_ds()
+ */
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
@@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws)
if (commit_if_needed(cache)) {
process_deferred_flush_bios(cache, false);
+ process_migrations(cache, &cache->need_commit_migrations, migration_failure);
/*
* FIXME: rollback metadata or just go into a
@@ -2406,16 +2433,13 @@ out:
return r;
}
-static int cache_map(struct dm_target *ti, struct bio *bio)
+static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
{
- struct cache *cache = ti->private;
-
int r;
dm_oblock_t block = get_bio_block(cache, bio);
size_t pb_data_size = get_per_bio_data_size(cache);
bool can_migrate = false;
bool discarded_block;
- struct dm_bio_prison_cell *cell;
struct policy_result lookup_result;
struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
@@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
/*
* Check to see if that block is currently migrating.
*/
- cell = alloc_prison_cell(cache);
- if (!cell) {
+ *cell = alloc_prison_cell(cache);
+ if (!*cell) {
defer_bio(cache, bio);
return DM_MAPIO_SUBMITTED;
}
- r = bio_detain(cache, block, bio, cell,
+ r = bio_detain(cache, block, bio, *cell,
(cell_free_fn) free_prison_cell,
- cache, &cell);
+ cache, cell);
if (r) {
if (r < 0)
defer_bio(cache, bio);
@@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
bio, &lookup_result);
if (r == -EWOULDBLOCK) {
- cell_defer(cache, cell, true);
+ cell_defer(cache, *cell, true);
return DM_MAPIO_SUBMITTED;
} else if (r) {
DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+ cell_defer(cache, *cell, false);
bio_io_error(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
* We need to invalidate this block, so
* defer for the worker thread.
*/
- cell_defer(cache, cell, true);
+ cell_defer(cache, *cell, true);
r = DM_MAPIO_SUBMITTED;
} else {
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
inc_miss_counter(cache, bio);
remap_to_origin_clear_discard(cache, bio, block);
-
- cell_defer(cache, cell, false);
}
} else {
inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock))
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
else
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-
- cell_defer(cache, cell, false);
}
break;
case POLICY_MISS:
inc_miss_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
if (pb->req_nr != 0) {
/*
* This is a duplicate writethrough io that is no
* longer needed because the block has been demoted.
*/
bio_endio(bio, 0);
- cell_defer(cache, cell, false);
- return DM_MAPIO_SUBMITTED;
- } else {
+ cell_defer(cache, *cell, false);
+ r = DM_MAPIO_SUBMITTED;
+
+ } else
remap_to_origin_clear_discard(cache, bio, block);
- cell_defer(cache, cell, false);
- }
+
break;
default:
DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
(unsigned) lookup_result.op);
+ cell_defer(cache, *cell, false);
bio_io_error(bio);
r = DM_MAPIO_SUBMITTED;
}
@@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r;
}
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+ int r;
+ struct dm_bio_prison_cell *cell;
+ struct cache *cache = ti->private;
+
+ r = __cache_map(cache, bio, &cell);
+ if (r == DM_MAPIO_REMAPPED) {
+ inc_ds(cache, bio, cell);
+ cell_defer(cache, cell, false);
+ }
+
+ return r;
+}
+
static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct cache *cache = ti->private;
@@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
residency = policy_residency(cache->policy);
DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
- (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
+ (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
(unsigned long long)nr_blocks_metadata,
cache->sectors_per_block,
@@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < cache->sectors_per_block ||
do_div(io_opt_sectors, cache->sectors_per_block)) {
- blk_limits_io_min(limits, 0);
+ blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
}
set_discard_limits(cache, limits);
@@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4cba2d808af..2785007e0e4 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -59,7 +59,7 @@ struct dm_crypt_io {
int error;
sector_t sector;
struct dm_crypt_io *base_io;
-};
+} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
struct convert_context *ctx;
@@ -162,6 +162,8 @@ struct crypt_config {
*/
unsigned int dmreq_start;
+ unsigned int per_bio_data_size;
+
unsigned long flags;
unsigned int key_size;
unsigned int key_parts; /* independent parts in key buffer */
@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc,
kcryptd_async_done, dmreq_of_req(cc, ctx->req));
}
+static void crypt_free_req(struct crypt_config *cc,
+ struct ablkcipher_request *req, struct bio *base_bio)
+{
+ struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
+
+ if ((struct ablkcipher_request *)(io + 1) != req)
+ mempool_free(req, cc->req_pool);
+}
+
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
}
}
-static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
- struct bio *bio, sector_t sector)
+static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
+ struct bio *bio, sector_t sector)
{
- struct dm_crypt_io *io;
-
- io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->cc = cc;
io->base_bio = bio;
io->sector = sector;
@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
io->base_io = NULL;
io->ctx.req = NULL;
atomic_set(&io->io_pending, 0);
-
- return io;
}
static void crypt_inc_pending(struct dm_crypt_io *io)
@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
return;
if (io->ctx.req)
- mempool_free(io->ctx.req, cc->req_pool);
- mempool_free(io, cc->io_pool);
+ crypt_free_req(cc, io->ctx.req, base_bio);
+ if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
+ mempool_free(io, cc->io_pool);
if (likely(!base_io))
bio_endio(base_bio, error);
@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
* between fragments, so switch to a new dm_crypt_io structure.
*/
if (unlikely(!crypt_finished && remaining)) {
- new_io = crypt_io_alloc(io->cc, io->base_bio,
- sector);
+ new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
+ crypt_io_init(new_io, io->cc, io->base_bio, sector);
crypt_inc_pending(new_io);
crypt_convert_init(cc, &new_io->ctx, NULL,
io->base_bio, sector);
@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (error < 0)
io->error = -EIO;
- mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
+ crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
if (!atomic_dec_and_test(&ctx->cc_pending))
return;
@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ cc->per_bio_data_size = ti->per_bio_data_size =
+ sizeof(struct dm_crypt_io) + cc->dmreq_start +
+ sizeof(struct dm_crypt_request) + cc->iv_size;
+
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
- io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io = dm_per_bio_data(bio, cc->per_bio_data_size);
+ crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io->ctx.req = (struct ablkcipher_request *)(io + 1);
if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT))
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index db404a0f7e2..c09359db3a9 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -33,7 +33,6 @@ struct dm_io_client {
struct io {
unsigned long error_bits;
atomic_t count;
- struct completion *wait;
struct dm_io_client *client;
io_notify_fn callback;
void *context;
@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
* We need an io object to keep track of the number of bios that
* have been dispatched for a particular io.
*---------------------------------------------------------------*/
-static void dec_count(struct io *io, unsigned int region, int error)
+static void complete_io(struct io *io)
{
- if (error)
- set_bit(region, &io->error_bits);
+ unsigned long error_bits = io->error_bits;
+ io_notify_fn fn = io->callback;
+ void *context = io->context;
- if (atomic_dec_and_test(&io->count)) {
- if (io->vma_invalidate_size)
- invalidate_kernel_vmap_range(io->vma_invalidate_address,
- io->vma_invalidate_size);
+ if (io->vma_invalidate_size)
+ invalidate_kernel_vmap_range(io->vma_invalidate_address,
+ io->vma_invalidate_size);
- if (io->wait)
- complete(io->wait);
+ mempool_free(io, io->client->pool);
+ fn(error_bits, context);
+}
- else {
- unsigned long r = io->error_bits;
- io_notify_fn fn = io->callback;
- void *context = io->context;
+static void dec_count(struct io *io, unsigned int region, int error)
+{
+ if (error)
+ set_bit(region, &io->error_bits);
- mempool_free(io, io->client->pool);
- fn(r, context);
- }
- }
+ if (atomic_dec_and_test(&io->count))
+ complete_io(io);
}
static void endio(struct bio *bio, int error)
@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions,
dec_count(io, 0, 0);
}
+struct sync_io {
+ unsigned long error_bits;
+ struct completion wait;
+};
+
+static void sync_io_complete(unsigned long error, void *context)
+{
+ struct sync_io *sio = context;
+
+ sio->error_bits = error;
+ complete(&sio->wait);
+}
+
static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
- /*
- * gcc <= 4.3 can't do the alignment for stack variables, so we must
- * align it on our own.
- * volatile prevents the optimizer from removing or reusing
- * "io_" field from the stack frame (allowed in ANSI C).
- */
- volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
- struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
- DECLARE_COMPLETION_ONSTACK(wait);
+ struct io *io;
+ struct sync_io sio;
if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
return -EIO;
}
+ init_completion(&sio.wait);
+
+ io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
- io->wait = &wait;
io->client = client;
+ io->callback = sync_io_complete;
+ io->context = &sio;
io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size;
dispatch_io(rw, num_regions, where, dp, io, 1);
- wait_for_completion_io(&wait);
+ wait_for_completion_io(&sio.wait);
if (error_bits)
- *error_bits = io->error_bits;
+ *error_bits = sio.error_bits;
- return io->error_bits ? -EIO : 0;
+ return sio.error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
- io->wait = NULL;
io->client = client;
io->callback = fn;
io->context = context;
@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
* New collapsed (a)synchronous interface.
*
* If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
- * the queue with blk_unplug() some time later or set REQ_SYNC in
-io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
- * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
+ * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw.
+ * If you fail to do one of these, the IO will be submitted to the disk after
+ * q->unplug_delay, which defaults to 3ms in blk-settings.c.
*/
int dm_io(struct dm_io_request *io_req, unsigned num_regions,
struct dm_io_region *where, unsigned long *sync_error_bits)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f4167b013d9..833d7e752f0 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m)
dm_noflush_suspending(m->ti)));
}
-#define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required)
-
/*
* Map cloned requests
*/
@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
if (!__must_push_back(m))
r = -EIO; /* Failed */
goto out_unlock;
- }
- if (!pg_ready(m)) {
+ } else if (m->queue_io || m->pg_init_required) {
__pg_init_all_paths(m);
goto out_unlock;
}
+
if (set_mapinfo(m, map_context) < 0)
/* ENOMEM, requeue */
goto out_unlock;
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 09a688b3d48..50fca469caf 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr
*bit *= sctx->region_table_entry_bits;
}
+static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
+{
+ unsigned long region_index;
+ unsigned bit;
+
+ switch_get_position(sctx, region_nr, &region_index, &bit);
+
+ return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
+ ((1 << sctx->region_table_entry_bits) - 1);
+}
+
/*
* Find which path to use at given offset.
*/
static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
{
- unsigned long region_index;
- unsigned bit, path_nr;
+ unsigned path_nr;
sector_t p;
p = offset;
@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
else
sector_div(p, sctx->region_size);
- switch_get_position(sctx, p, &region_index, &bit);
- path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
- ((1 << sctx->region_table_entry_bits) - 1);
+ path_nr = switch_region_table_read(sctx, p);
/* This can only happen if the processor uses non-atomic stores. */
if (unlikely(path_nr >= sctx->nr_paths))
@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string)
}
static int process_set_region_mappings(struct switch_ctx *sctx,
- unsigned argc, char **argv)
+ unsigned argc, char **argv)
{
unsigned i;
unsigned long region_index = 0;
@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
unsigned long path_nr;
const char *string = argv[i];
+ if ((*string & 0xdf) == 'R') {
+ unsigned long cycle_length, num_write;
+
+ string++;
+ if (unlikely(*string == ',')) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+ cycle_length = parse_hex(&string);
+ if (unlikely(*string != ',')) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+ string++;
+ if (unlikely(!*string)) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+ num_write = parse_hex(&string);
+ if (unlikely(*string)) {
+ DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+ return -EINVAL;
+ }
+
+ if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
+ DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
+ cycle_length - 1, region_index);
+ return -EINVAL;
+ }
+ if (unlikely(region_index + num_write < region_index) ||
+ unlikely(region_index + num_write >= sctx->nr_regions)) {
+ DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
+ region_index, num_write, sctx->nr_regions);
+ return -EINVAL;
+ }
+
+ while (num_write--) {
+ region_index++;
+ path_nr = switch_region_table_read(sctx, region_index - cycle_length);
+ switch_region_table_write(sctx, region_index, path_nr);
+ }
+
+ continue;
+ }
+
if (*string == ':')
region_index++;
else {
@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti,
static struct target_type switch_target = {
.name = "switch",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = switch_ctr,
.dtr = switch_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5f59f1e3e5b..f9c6cb8dbcf 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
return q && !blk_queue_add_random(q);
}
+static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
+}
+
static bool dm_table_all_devices_attribute(struct dm_table *t,
iterate_devices_callout_fn func)
{
@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t)
return true;
}
+static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && blk_queue_discard(q);
+}
+
+static bool dm_table_supports_discards(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i = 0;
+
+ /*
+ * Unless any target used by the table set discards_supported,
+ * require at least one underlying device to support discards.
+ * t->devices includes internal dm devices such as mirror logs
+ * so we need to use iterate_devices here, which targets
+ * supporting discard selectively must provide.
+ */
+ while (i < dm_table_get_num_targets(t)) {
+ ti = dm_table_get_target(t, i++);
+
+ if (!ti->num_discard_bios)
+ continue;
+
+ if (ti->discards_supported)
+ return 1;
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, device_discard_capable, NULL))
+ return 1;
+ }
+
+ return 0;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
+ if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
+ queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ else
+ queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+
dm_table_set_integrity(t);
/*
@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_run_md_queue_async);
-static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return q && blk_queue_discard(q);
-}
-
-bool dm_table_supports_discards(struct dm_table *t)
-{
- struct dm_target *ti;
- unsigned i = 0;
-
- /*
- * Unless any target used by the table set discards_supported,
- * require at least one underlying device to support discards.
- * t->devices includes internal dm devices such as mirror logs
- * so we need to use iterate_devices here, which targets
- * supporting discard selectively must provide.
- */
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
-
- if (!ti->num_discard_bios)
- continue;
-
- if (ti->discards_supported)
- return 1;
-
- if (ti->type->iterate_devices &&
- ti->type->iterate_devices(ti, device_discard_capable, NULL))
- return 1;
- }
-
- return 0;
-}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fc9c848a60c..4843801173f 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -227,6 +227,7 @@ struct thin_c {
struct list_head list;
struct dm_dev *pool_dev;
struct dm_dev *origin_dev;
+ sector_t origin_size;
dm_thin_id dev_id;
struct pool *pool;
@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
struct dm_thin_new_mapping {
struct list_head list;
- bool quiesced:1;
- bool prepared:1;
bool pass_discard:1;
bool definitely_not_shared:1;
+ /*
+ * Track quiescing, copying and zeroing preparation actions. When this
+ * counter hits zero the block is prepared and can be inserted into the
+ * btree.
+ */
+ atomic_t prepare_actions;
+
int err;
struct thin_c *tc;
dm_block_t virt_block;
@@ -575,43 +581,41 @@ struct dm_thin_new_mapping {
bio_end_io_t *saved_bi_end_io;
};
-static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
+static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
{
struct pool *pool = m->tc->pool;
- if (m->quiesced && m->prepared) {
+ if (atomic_dec_and_test(&m->prepare_actions)) {
list_add_tail(&m->list, &pool->prepared_mappings);
wake_worker(pool);
}
}
-static void copy_complete(int read_err, unsigned long write_err, void *context)
+static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
{
unsigned long flags;
- struct dm_thin_new_mapping *m = context;
struct pool *pool = m->tc->pool;
- m->err = read_err || write_err ? -EIO : 0;
-
spin_lock_irqsave(&pool->lock, flags);
- m->prepared = true;
- __maybe_add_mapping(m);
+ __complete_mapping_preparation(m);
spin_unlock_irqrestore(&pool->lock, flags);
}
+static void copy_complete(int read_err, unsigned long write_err, void *context)
+{
+ struct dm_thin_new_mapping *m = context;
+
+ m->err = read_err || write_err ? -EIO : 0;
+ complete_mapping_preparation(m);
+}
+
static void overwrite_endio(struct bio *bio, int err)
{
- unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct dm_thin_new_mapping *m = h->overwrite_mapping;
- struct pool *pool = m->tc->pool;
m->err = err;
-
- spin_lock_irqsave(&pool->lock, flags);
- m->prepared = true;
- __maybe_add_mapping(m);
- spin_unlock_irqrestore(&pool->lock, flags);
+ complete_mapping_preparation(m);
}
/*----------------------------------------------------------------*/
@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
return m;
}
+static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
+ sector_t begin, sector_t end)
+{
+ int r;
+ struct dm_io_region to;
+
+ to.bdev = tc->pool_dev->bdev;
+ to.sector = begin;
+ to.count = end - begin;
+
+ r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
+ if (r < 0) {
+ DMERR_LIMIT("dm_kcopyd_zero() failed");
+ copy_complete(1, 1, m);
+ }
+}
+
+/*
+ * A partial copy also needs to zero the uncopied region.
+ */
static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_dev *origin, dm_block_t data_origin,
dm_block_t data_dest,
- struct dm_bio_prison_cell *cell, struct bio *bio)
+ struct dm_bio_prison_cell *cell, struct bio *bio,
+ sector_t len)
{
int r;
struct pool *pool = tc->pool;
@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
m->data_block = data_dest;
m->cell = cell;
+ /*
+ * quiesce action + copy action + an extra reference held for the
+ * duration of this function (we may need to inc later for a
+ * partial zero).
+ */
+ atomic_set(&m->prepare_actions, 3);
+
if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
- m->quiesced = true;
+ complete_mapping_preparation(m); /* already quiesced */
/*
* IO to pool_dev remaps to the pool target's data_dev.
@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
from.bdev = origin->bdev;
from.sector = data_origin * pool->sectors_per_block;
- from.count = pool->sectors_per_block;
+ from.count = len;
to.bdev = tc->pool_dev->bdev;
to.sector = data_dest * pool->sectors_per_block;
- to.count = pool->sectors_per_block;
+ to.count = len;
r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
0, copy_complete, m);
if (r < 0) {
- mempool_free(m, pool->mapping_pool);
DMERR_LIMIT("dm_kcopyd_copy() failed");
- cell_error(pool, cell);
+ copy_complete(1, 1, m);
+
+ /*
+ * We allow the zero to be issued, to simplify the
+ * error path. Otherwise we'd need to start
+ * worrying about decrementing the prepare_actions
+ * counter.
+ */
+ }
+
+ /*
+ * Do we need to zero a tail region?
+ */
+ if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
+ atomic_inc(&m->prepare_actions);
+ ll_zero(tc, m,
+ data_dest * pool->sectors_per_block + len,
+ (data_dest + 1) * pool->sectors_per_block);
}
}
+
+ complete_mapping_preparation(m); /* drop our ref */
}
static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_bio_prison_cell *cell, struct bio *bio)
{
schedule_copy(tc, virt_block, tc->pool_dev,
- data_origin, data_dest, cell, bio);
-}
-
-static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
- dm_block_t data_dest,
- struct dm_bio_prison_cell *cell, struct bio *bio)
-{
- schedule_copy(tc, virt_block, tc->origin_dev,
- virt_block, data_dest, cell, bio);
+ data_origin, data_dest, cell, bio,
+ tc->pool->sectors_per_block);
}
static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
struct pool *pool = tc->pool;
struct dm_thin_new_mapping *m = get_next_mapping(pool);
- m->quiesced = true;
- m->prepared = false;
+ atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
m->tc = tc;
m->virt_block = virt_block;
m->data_block = data_block;
@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
inc_all_io_entry(pool, bio);
remap_and_issue(tc, bio, data_block);
- } else {
- int r;
- struct dm_io_region to;
- to.bdev = tc->pool_dev->bdev;
- to.sector = data_block * pool->sectors_per_block;
- to.count = pool->sectors_per_block;
+ } else
+ ll_zero(tc, m,
+ data_block * pool->sectors_per_block,
+ (data_block + 1) * pool->sectors_per_block);
+}
- r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
- if (r < 0) {
- mempool_free(m, pool->mapping_pool);
- DMERR_LIMIT("dm_kcopyd_zero() failed");
- cell_error(pool, cell);
- }
- }
+static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
+ dm_block_t data_dest,
+ struct dm_bio_prison_cell *cell, struct bio *bio)
+{
+ struct pool *pool = tc->pool;
+ sector_t virt_block_begin = virt_block * pool->sectors_per_block;
+ sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
+
+ if (virt_block_end <= tc->origin_size)
+ schedule_copy(tc, virt_block, tc->origin_dev,
+ virt_block, data_dest, cell, bio,
+ pool->sectors_per_block);
+
+ else if (virt_block_begin < tc->origin_size)
+ schedule_copy(tc, virt_block, tc->origin_dev,
+ virt_block, data_dest, cell, bio,
+ tc->origin_size - virt_block_begin);
+
+ else
+ schedule_zero(tc, virt_block, data_dest, cell, bio);
}
/*
@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
- remap_to_origin_and_issue(tc, bio);
+ if (bio_end_sector(bio) <= tc->origin_size)
+ remap_to_origin_and_issue(tc, bio);
+
+ else if (bio->bi_iter.bi_sector < tc->origin_size) {
+ zero_fill_bio(bio);
+ bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
+ remap_to_origin_and_issue(tc, bio);
+
+ } else {
+ zero_fill_bio(bio);
+ bio_endio(bio, 0);
+ }
} else
provision_block(tc, bio, block, cell);
break;
@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < pool->sectors_per_block ||
do_div(io_opt_sectors, pool->sectors_per_block)) {
- blk_limits_io_min(limits, 0);
+ blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
}
@@ -3141,7 +3206,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
- .version = {1, 12, 0},
+ .version = {1, 13, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry_safe(m, tmp, &work, list) {
list_del(&m->list);
- m->quiesced = true;
- __maybe_add_mapping(m);
+ __complete_mapping_preparation(m);
}
spin_unlock_irqrestore(&pool->lock, flags);
}
@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti)
noflush_work(tc, do_noflush_stop);
}
+static int thin_preresume(struct dm_target *ti)
+{
+ struct thin_c *tc = ti->private;
+
+ if (tc->origin_dev)
+ tc->origin_size = get_dev_size(tc->origin_dev->bdev);
+
+ return 0;
+}
+
/*
* <nr mapped sectors> <highest mapped sector>
*/
@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti,
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 12, 0},
+ .version = {1, 13, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
.map = thin_map,
.end_io = thin_endio,
+ .preresume = thin_preresume,
.presuspend = thin_presuspend,
.postsuspend = thin_postsuspend,
.status = thin_status,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index ed76126aac5..e81d2152fa6 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t);
unsigned dm_table_get_type(struct dm_table *t);
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
-bool dm_table_supports_discards(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
index 6352bec8419..71f387ce8cb 100644
--- a/drivers/mfd/rtsx_usb.c
+++ b/drivers/mfd/rtsx_usb.c
@@ -744,6 +744,7 @@ static struct usb_device_id rtsx_usb_usb_ids[] = {
{ USB_DEVICE(0x0BDA, 0x0140) },
{ }
};
+MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids);
static struct usb_driver rtsx_usb_driver = {
.name = "rtsx_usb",
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 452782bffeb..ede41f05c39 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2028,8 +2028,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
/* complete ongoing async transfer before issuing discard */
if (card->host->areq)
mmc_blk_issue_rw_rq(mq, NULL);
- if (req->cmd_flags & REQ_SECURE &&
- !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
+ if (req->cmd_flags & REQ_SECURE)
ret = mmc_blk_issue_secdiscard_rq(mq, req);
else
ret = mmc_blk_issue_discard_rq(mq, req);
@@ -2432,6 +2431,8 @@ static int mmc_blk_probe(struct mmc_card *card)
if (!(card->csd.cmdclass & CCC_BLOCK_READ))
return -ENODEV;
+ mmc_fixup_device(card, blk_fixups);
+
md = mmc_blk_alloc(card);
if (IS_ERR(md))
return PTR_ERR(md);
@@ -2446,7 +2447,6 @@ static int mmc_blk_probe(struct mmc_card *card)
goto out;
mmc_set_drvdata(card, md);
- mmc_fixup_device(card, blk_fixups);
if (mmc_add_disk(md))
goto out;
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index d2dbf02022b..8a1f1240e05 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -180,7 +180,6 @@ static int mmc_bus_resume(struct device *dev)
#endif
#ifdef CONFIG_PM_RUNTIME
-
static int mmc_runtime_suspend(struct device *dev)
{
struct mmc_card *card = mmc_dev_to_card(dev);
@@ -196,17 +195,10 @@ static int mmc_runtime_resume(struct device *dev)
return host->bus_ops->runtime_resume(host);
}
-
-static int mmc_runtime_idle(struct device *dev)
-{
- return 0;
-}
-
#endif /* !CONFIG_PM_RUNTIME */
static const struct dev_pm_ops mmc_bus_pm_ops = {
- SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume,
- mmc_runtime_idle)
+ SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, NULL)
SET_SYSTEM_SLEEP_PM_OPS(mmc_bus_suspend, mmc_bus_resume)
};
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7dc0c85fdb6..d03a080fb9c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2102,7 +2102,8 @@ EXPORT_SYMBOL(mmc_can_sanitize);
int mmc_can_secure_erase_trim(struct mmc_card *card)
{
- if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)
+ if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) &&
+ !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
return 1;
return 0;
}
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 793c6f7ddb0..1eda8dd8c86 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -324,13 +324,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
}
}
+ /*
+ * The EXT_CSD format is meant to be forward compatible. As long
+ * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV
+ * are authorized, see JEDEC JESD84-B50 section B.8.
+ */
card->ext_csd.rev = ext_csd[EXT_CSD_REV];
- if (card->ext_csd.rev > 7) {
- pr_err("%s: unrecognised EXT_CSD revision %d\n",
- mmc_hostname(card->host), card->ext_csd.rev);
- err = -EINVAL;
- goto out;
- }
card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index 6c36fccaa1e..dd1d1e0fe32 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -91,7 +91,7 @@ void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table)
(f->cis_device == card->cis.device ||
f->cis_device == (u16) SDIO_ANY_ID) &&
rev >= f->rev_start && rev <= f->rev_end) {
- dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup);
+ dev_dbg(&card->dev, "calling %pf\n", f->vendor_fixup);
f->vendor_fixup(card, f->data);
}
}
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index 274ef00b446..48d0c93ba25 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -184,6 +184,9 @@ int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
mmc_delay(10);
}
+ if (!i)
+ pr_err("%s: card never left busy state\n", mmc_hostname(host));
+
if (rocr && !mmc_host_is_spi(host))
*rocr = cmd.resp[0];
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index a5652548230..45113582246 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -290,6 +290,18 @@ config MMC_MOXART
be found on some embedded hardware such as UC-7112-LX.
If you have a controller with this interface, say Y here.
+config MMC_SDHCI_ST
+ tristate "SDHCI support on STMicroelectronics SoC"
+ depends on ARCH_STI
+ depends on MMC_SDHCI_PLTFM
+ select MMC_SDHCI_IO_ACCESSORS
+ help
+ This selects the Secure Digital Host Controller Interface in
+ STMicroelectronics SoCs.
+
+ If you have a controller with this interface, say Y or M here.
+ If unsure, say N.
+
config MMC_OMAP
tristate "TI OMAP Multimedia Card Interface support"
depends on ARCH_OMAP
@@ -303,6 +315,7 @@ config MMC_OMAP
config MMC_OMAP_HS
tristate "TI OMAP High Speed Multimedia Card Interface support"
+ depends on HAS_DMA
depends on ARCH_OMAP2PLUS || COMPILE_TEST
help
This selects the TI OMAP High Speed Multimedia card Interface.
@@ -343,7 +356,7 @@ config MMC_ATMELMCI
config MMC_SDHCI_MSM
tristate "Qualcomm SDHCI Controller Support"
- depends on ARCH_QCOM
+ depends on ARCH_QCOM || (ARM && COMPILE_TEST)
depends on MMC_SDHCI_PLTFM
help
This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -440,6 +453,7 @@ config MMC_SPI
config MMC_S3C
tristate "Samsung S3C SD/MMC Card Interface support"
depends on ARCH_S3C24XX
+ depends on S3C24XX_DMAC
help
This selects a driver for the MCI interface found in
Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs.
@@ -477,15 +491,6 @@ config MMC_S3C_DMA
working properly and needs to be debugged before this
option is useful.
-config MMC_S3C_PIODMA
- bool "Support for both PIO and DMA"
- help
- Compile both the PIO and DMA transfer routines into the
- driver and let the platform select at run-time which one
- is best.
-
- See notes for the DMA option.
-
endchoice
config MMC_SDRICOH_CS
@@ -623,7 +628,7 @@ config MMC_DW_PCI
config MMC_SH_MMCIF
tristate "SuperH Internal MMCIF support"
- depends on MMC_BLOCK
+ depends on MMC_BLOCK && HAS_DMA
depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
help
This selects the MMC Host Interface controller (MMCIF).
@@ -697,6 +702,7 @@ config MMC_WMT
config MMC_USDHI6ROL0
tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support"
+ depends on HAS_DMA
help
This selects support for the Renesas USDHI6ROL0 SD/SDIO
Host Controller
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 7f81ddf1dd2..f211eede8db 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o
obj-$(CONFIG_MMC_SDHCI_BCM_KONA) += sdhci-bcm-kona.o
obj-$(CONFIG_MMC_SDHCI_BCM2835) += sdhci-bcm2835.o
obj-$(CONFIG_MMC_SDHCI_MSM) += sdhci-msm.o
+obj-$(CONFIG_MMC_SDHCI_ST) += sdhci-st.o
ifeq ($(CONFIG_CB710_DEBUG),y)
CFLAGS-cb710-mmc += -DDEBUG
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 1ac227c603b..8f216edbdf0 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -111,8 +111,7 @@ static const u8 tuning_blk_pattern_8bit[] = {
0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
};
-static inline bool dw_mci_fifo_reset(struct dw_mci *host);
-static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host);
+static bool dw_mci_reset(struct dw_mci *host);
#if defined(CONFIG_DEBUG_FS)
static int dw_mci_req_show(struct seq_file *s, void *v)
@@ -997,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
int gpio_ro = mmc_gpio_get_ro(mmc);
/* Use platform get_ro function, else try on board write protect */
- if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
+ if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) ||
+ (slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT))
read_only = 0;
else if (!IS_ERR_VALUE(gpio_ro))
read_only = gpio_ro;
@@ -1235,7 +1235,7 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
* After an error, there may be data lingering
* in the FIFO
*/
- dw_mci_fifo_reset(host);
+ dw_mci_reset(host);
} else {
data->bytes_xfered = data->blocks * data->blksz;
data->error = 0;
@@ -1352,7 +1352,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
/* CMD error in data command */
if (mrq->cmd->error && mrq->data)
- dw_mci_fifo_reset(host);
+ dw_mci_reset(host);
host->cmd = NULL;
host->data = NULL;
@@ -1963,14 +1963,8 @@ static void dw_mci_work_routine_card(struct work_struct *work)
}
/* Power down slot */
- if (present == 0) {
- /* Clear down the FIFO */
- dw_mci_fifo_reset(host);
-#ifdef CONFIG_MMC_DW_IDMAC
- dw_mci_idmac_reset(host);
-#endif
-
- }
+ if (present == 0)
+ dw_mci_reset(host);
spin_unlock_bh(&host->lock);
@@ -2021,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
/* get quirks */
for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
- if (of_get_property(np, of_slot_quirks[idx].quirk, NULL))
+ if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) {
+ dev_warn(dev, "Slot quirk %s is deprecated\n",
+ of_slot_quirks[idx].quirk);
quirks |= of_slot_quirks[idx].id;
+ }
return quirks;
}
@@ -2208,8 +2205,11 @@ static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
return false;
}
-static inline bool dw_mci_fifo_reset(struct dw_mci *host)
+static bool dw_mci_reset(struct dw_mci *host)
{
+ u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET;
+ bool ret = false;
+
/*
* Reseting generates a block interrupt, hence setting
* the scatter-gather pointer to NULL.
@@ -2219,15 +2219,60 @@ static inline bool dw_mci_fifo_reset(struct dw_mci *host)
host->sg = NULL;
}
- return dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET);
-}
+ if (host->use_dma)
+ flags |= SDMMC_CTRL_DMA_RESET;
-static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host)
-{
- return dw_mci_ctrl_reset(host,
- SDMMC_CTRL_FIFO_RESET |
- SDMMC_CTRL_RESET |
- SDMMC_CTRL_DMA_RESET);
+ if (dw_mci_ctrl_reset(host, flags)) {
+ /*
+ * In all cases we clear the RAWINTS register to clear any
+ * interrupts.
+ */
+ mci_writel(host, RINTSTS, 0xFFFFFFFF);
+
+ /* if using dma we wait for dma_req to clear */
+ if (host->use_dma) {
+ unsigned long timeout = jiffies + msecs_to_jiffies(500);
+ u32 status;
+ do {
+ status = mci_readl(host, STATUS);
+ if (!(status & SDMMC_STATUS_DMA_REQ))
+ break;
+ cpu_relax();
+ } while (time_before(jiffies, timeout));
+
+ if (status & SDMMC_STATUS_DMA_REQ) {
+ dev_err(host->dev,
+ "%s: Timeout waiting for dma_req to "
+ "clear during reset\n", __func__);
+ goto ciu_out;
+ }
+
+ /* when using DMA next we reset the fifo again */
+ if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET))
+ goto ciu_out;
+ }
+ } else {
+ /* if the controller reset bit did clear, then set clock regs */
+ if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
+ dev_err(host->dev, "%s: fifo/dma reset bits didn't "
+ "clear but ciu was reset, doing clock update\n",
+ __func__);
+ goto ciu_out;
+ }
+ }
+
+#if IS_ENABLED(CONFIG_MMC_DW_IDMAC)
+ /* It is also recommended that we reset and reprogram idmac */
+ dw_mci_idmac_reset(host);
+#endif
+
+ ret = true;
+
+ciu_out:
+ /* After a CTRL reset we need to have CIU set clock registers */
+ mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0);
+
+ return ret;
}
#ifdef CONFIG_OF
@@ -2238,6 +2283,9 @@ static struct dw_mci_of_quirks {
{
.quirk = "broken-cd",
.id = DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
+ }, {
+ .quirk = "disable-wp",
+ .id = DW_MCI_QUIRK_NO_WRITE_PROTECT,
},
};
@@ -2425,7 +2473,7 @@ int dw_mci_probe(struct dw_mci *host)
}
/* Reset all blocks */
- if (!dw_mci_ctrl_all_reset(host))
+ if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS))
return -ENODEV;
host->dma_ops = host->pdata->dma_ops;
@@ -2612,7 +2660,7 @@ int dw_mci_resume(struct dw_mci *host)
}
}
- if (!dw_mci_ctrl_all_reset(host)) {
+ if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
ret = -ENODEV;
return ret;
}
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 738fa241d05..08fd956d81f 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -129,6 +129,7 @@
#define SDMMC_CMD_INDX(n) ((n) & 0x1F)
/* Status register defines */
#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF)
+#define SDMMC_STATUS_DMA_REQ BIT(31)
/* FIFOTH register defines */
#define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \
((r) & 0xFFF) << 16 | \
@@ -150,6 +151,10 @@
/* Card read threshold */
#define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x))
+/* All ctrl reset bits */
+#define SDMMC_CTRL_ALL_RESET_FLAGS \
+ (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
+
/* Register access macros */
#define mci_readl(dev, reg) \
__raw_readl((dev)->regs + SDMMC_##reg)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7ad463e9741..e4d47070415 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -52,34 +52,53 @@ static unsigned int fmax = 515633;
* struct variant_data - MMCI variant-specific quirks
* @clkreg: default value for MCICLOCK register
* @clkreg_enable: enable value for MMCICLOCK register
+ * @clkreg_8bit_bus_enable: enable value for 8 bit bus
+ * @clkreg_neg_edge_enable: enable value for inverted data/cmd output
* @datalength_bits: number of bits in the MMCIDATALENGTH register
* @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
* is asserted (likewise for RX)
* @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
* is asserted (likewise for RX)
+ * @data_cmd_enable: enable value for data commands.
* @sdio: variant supports SDIO
* @st_clkdiv: true if using a ST-specific clock divider algorithm
+ * @datactrl_mask_ddrmode: ddr mode mask in datactrl register.
* @blksz_datactrl16: true if Block size is at b16..b30 position in datactrl register
+ * @blksz_datactrl4: true if Block size is at b4..b16 position in datactrl
+ * register
* @pwrreg_powerup: power up value for MMCIPOWER register
+ * @f_max: maximum clk frequency supported by the controller.
* @signal_direction: input/out direction of bus signals can be indicated
* @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock
* @busy_detect: true if busy detection on dat0 is supported
* @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply
+ * @explicit_mclk_control: enable explicit mclk control in driver.
+ * @qcom_fifo: enables qcom specific fifo pio read logic.
+ * @reversed_irq_handling: handle data irq before cmd irq.
*/
struct variant_data {
unsigned int clkreg;
unsigned int clkreg_enable;
+ unsigned int clkreg_8bit_bus_enable;
+ unsigned int clkreg_neg_edge_enable;
unsigned int datalength_bits;
unsigned int fifosize;
unsigned int fifohalfsize;
+ unsigned int data_cmd_enable;
+ unsigned int datactrl_mask_ddrmode;
bool sdio;
bool st_clkdiv;
bool blksz_datactrl16;
+ bool blksz_datactrl4;
u32 pwrreg_powerup;
+ u32 f_max;
bool signal_direction;
bool pwrreg_clkgate;
bool busy_detect;
bool pwrreg_nopower;
+ bool explicit_mclk_control;
+ bool qcom_fifo;
+ bool reversed_irq_handling;
};
static struct variant_data variant_arm = {
@@ -87,6 +106,8 @@ static struct variant_data variant_arm = {
.fifohalfsize = 8 * 4,
.datalength_bits = 16,
.pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 100000000,
+ .reversed_irq_handling = true,
};
static struct variant_data variant_arm_extended_fifo = {
@@ -94,6 +115,7 @@ static struct variant_data variant_arm_extended_fifo = {
.fifohalfsize = 64 * 4,
.datalength_bits = 16,
.pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 100000000,
};
static struct variant_data variant_arm_extended_fifo_hwfc = {
@@ -102,15 +124,18 @@ static struct variant_data variant_arm_extended_fifo_hwfc = {
.clkreg_enable = MCI_ARM_HWFCEN,
.datalength_bits = 16,
.pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 100000000,
};
static struct variant_data variant_u300 = {
.fifosize = 16 * 4,
.fifohalfsize = 8 * 4,
.clkreg_enable = MCI_ST_U300_HWFCEN,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
.datalength_bits = 16,
.sdio = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.pwrreg_nopower = true,
@@ -124,6 +149,7 @@ static struct variant_data variant_nomadik = {
.sdio = true,
.st_clkdiv = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.pwrreg_nopower = true,
@@ -134,10 +160,13 @@ static struct variant_data variant_ux500 = {
.fifohalfsize = 8 * 4,
.clkreg = MCI_CLK_ENABLE,
.clkreg_enable = MCI_ST_UX500_HWFCEN,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
+ .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE,
.datalength_bits = 24,
.sdio = true,
.st_clkdiv = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.busy_detect = true,
@@ -149,17 +178,38 @@ static struct variant_data variant_ux500v2 = {
.fifohalfsize = 8 * 4,
.clkreg = MCI_CLK_ENABLE,
.clkreg_enable = MCI_ST_UX500_HWFCEN,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
+ .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE,
+ .datactrl_mask_ddrmode = MCI_ST_DPSM_DDRMODE,
.datalength_bits = 24,
.sdio = true,
.st_clkdiv = true,
.blksz_datactrl16 = true,
.pwrreg_powerup = MCI_PWR_ON,
+ .f_max = 100000000,
.signal_direction = true,
.pwrreg_clkgate = true,
.busy_detect = true,
.pwrreg_nopower = true,
};
+static struct variant_data variant_qcom = {
+ .fifosize = 16 * 4,
+ .fifohalfsize = 8 * 4,
+ .clkreg = MCI_CLK_ENABLE,
+ .clkreg_enable = MCI_QCOM_CLK_FLOWENA |
+ MCI_QCOM_CLK_SELECT_IN_FBCLK,
+ .clkreg_8bit_bus_enable = MCI_QCOM_CLK_WIDEBUS_8,
+ .datactrl_mask_ddrmode = MCI_QCOM_CLK_SELECT_IN_DDR_MODE,
+ .data_cmd_enable = MCI_QCOM_CSPM_DATCMD,
+ .blksz_datactrl4 = true,
+ .datalength_bits = 24,
+ .pwrreg_powerup = MCI_PWR_UP,
+ .f_max = 208000000,
+ .explicit_mclk_control = true,
+ .qcom_fifo = true,
+};
+
static int mmci_card_busy(struct mmc_host *mmc)
{
struct mmci_host *host = mmc_priv(mmc);
@@ -260,7 +310,9 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
host->cclk = 0;
if (desired) {
- if (desired >= host->mclk) {
+ if (variant->explicit_mclk_control) {
+ host->cclk = host->mclk;
+ } else if (desired >= host->mclk) {
clk = MCI_CLK_BYPASS;
if (variant->st_clkdiv)
clk |= MCI_ST_UX500_NEG_EDGE;
@@ -299,11 +351,11 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4)
clk |= MCI_4BIT_BUS;
if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8)
- clk |= MCI_ST_8BIT_BUS;
+ clk |= variant->clkreg_8bit_bus_enable;
if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
- clk |= MCI_ST_UX500_NEG_EDGE;
+ clk |= variant->clkreg_neg_edge_enable;
mmci_write_clkreg(host, clk);
}
@@ -719,7 +771,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
data->bytes_xfered = 0;
clks = (unsigned long long)data->timeout_ns * host->cclk;
- do_div(clks, 1000000000UL);
+ do_div(clks, NSEC_PER_SEC);
timeout = data->timeout_clks + (unsigned int)clks;
@@ -732,6 +784,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
if (variant->blksz_datactrl16)
datactrl = MCI_DPSM_ENABLE | (data->blksz << 16);
+ else if (variant->blksz_datactrl4)
+ datactrl = MCI_DPSM_ENABLE | (data->blksz << 4);
else
datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
@@ -767,7 +821,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
- datactrl |= MCI_ST_DPSM_DDRMODE;
+ datactrl |= variant->datactrl_mask_ddrmode;
/*
* Attempt to use DMA operation mode, if this
@@ -812,7 +866,7 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) {
writel(0, base + MMCICOMMAND);
- udelay(1);
+ mmci_reg_delay(host);
}
c |= cmd->opcode | MCI_CPSM_ENABLE;
@@ -824,6 +878,9 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
if (/*interrupt*/0)
c |= MCI_CPSM_INTERRUPT;
+ if (mmc_cmd_type(cmd) == MMC_CMD_ADTC)
+ c |= host->variant->data_cmd_enable;
+
host->cmd = cmd;
writel(cmd->arg, base + MMCIARGUMENT);
@@ -834,6 +891,10 @@ static void
mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
unsigned int status)
{
+ /* Make sure we have data to handle */
+ if (!data)
+ return;
+
/* First check for errors */
if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
@@ -902,9 +963,17 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
unsigned int status)
{
void __iomem *base = host->base;
- bool sbc = (cmd == host->mrq->sbc);
- bool busy_resp = host->variant->busy_detect &&
- (cmd->flags & MMC_RSP_BUSY);
+ bool sbc, busy_resp;
+
+ if (!cmd)
+ return;
+
+ sbc = (cmd == host->mrq->sbc);
+ busy_resp = host->variant->busy_detect && (cmd->flags & MMC_RSP_BUSY);
+
+ if (!((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
+ MCI_CMDSENT|MCI_CMDRESPEND)))
+ return;
/* Check if we need to wait for busy completion. */
if (host->busy_status && (status & MCI_ST_CARDBUSY))
@@ -957,15 +1026,34 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
}
}
+static int mmci_get_rx_fifocnt(struct mmci_host *host, u32 status, int remain)
+{
+ return remain - (readl(host->base + MMCIFIFOCNT) << 2);
+}
+
+static int mmci_qcom_get_rx_fifocnt(struct mmci_host *host, u32 status, int r)
+{
+ /*
+ * on qcom SDCC4 only 8 words are used in each burst so only 8 addresses
+ * from the fifo range should be used
+ */
+ if (status & MCI_RXFIFOHALFFULL)
+ return host->variant->fifohalfsize;
+ else if (status & MCI_RXDATAAVLBL)
+ return 4;
+
+ return 0;
+}
+
static int mmci_pio_read(struct mmci_host *host, char *buffer, unsigned int remain)
{
void __iomem *base = host->base;
char *ptr = buffer;
- u32 status;
+ u32 status = readl(host->base + MMCISTATUS);
int host_remain = host->size;
do {
- int count = host_remain - (readl(base + MMCIFIFOCNT) << 2);
+ int count = host->get_rx_fifocnt(host, status, host_remain);
if (count > remain)
count = remain;
@@ -1132,9 +1220,6 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
spin_lock(&host->lock);
do {
- struct mmc_command *cmd;
- struct mmc_data *data;
-
status = readl(host->base + MMCISTATUS);
if (host->singleirq) {
@@ -1154,16 +1239,13 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
- cmd = host->cmd;
- if ((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
- MCI_CMDSENT|MCI_CMDRESPEND) && cmd)
- mmci_cmd_irq(host, cmd, status);
-
- data = host->data;
- if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
- MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND|
- MCI_DATABLOCKEND) && data)
- mmci_data_irq(host, data, status);
+ if (host->variant->reversed_irq_handling) {
+ mmci_data_irq(host, host->data, status);
+ mmci_cmd_irq(host, host->cmd, status);
+ } else {
+ mmci_cmd_irq(host, host->cmd, status);
+ mmci_data_irq(host, host->data, status);
+ }
/* Don't poll for busy completion in irq context. */
if (host->busy_status)
@@ -1296,6 +1378,17 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
if (!ios->clock && variant->pwrreg_clkgate)
pwr &= ~MCI_PWR_ON;
+ if (host->variant->explicit_mclk_control &&
+ ios->clock != host->clock_cache) {
+ ret = clk_set_rate(host->clk, ios->clock);
+ if (ret < 0)
+ dev_err(mmc_dev(host->mmc),
+ "Error setting clock rate (%d)\n", ret);
+ else
+ host->mclk = clk_get_rate(host->clk);
+ }
+ host->clock_cache = ios->clock;
+
spin_lock_irqsave(&host->lock, flags);
mmci_set_clkreg(host, ios->clock);
@@ -1443,6 +1536,11 @@ static int mmci_probe(struct amba_device *dev,
if (ret)
goto host_free;
+ if (variant->qcom_fifo)
+ host->get_rx_fifocnt = mmci_qcom_get_rx_fifocnt;
+ else
+ host->get_rx_fifocnt = mmci_get_rx_fifocnt;
+
host->plat = plat;
host->variant = variant;
host->mclk = clk_get_rate(host->clk);
@@ -1451,8 +1549,8 @@ static int mmci_probe(struct amba_device *dev,
* so we try to adjust the clock down to this,
* (if possible).
*/
- if (host->mclk > 100000000) {
- ret = clk_set_rate(host->clk, 100000000);
+ if (host->mclk > variant->f_max) {
+ ret = clk_set_rate(host->clk, variant->f_max);
if (ret < 0)
goto clk_disable;
host->mclk = clk_get_rate(host->clk);
@@ -1471,9 +1569,12 @@ static int mmci_probe(struct amba_device *dev,
* The ARM and ST versions of the block have slightly different
* clock divider equations which means that the minimum divider
* differs too.
+ * on Qualcomm like controllers get the nearest minimum clock to 100Khz
*/
if (variant->st_clkdiv)
mmc->f_min = DIV_ROUND_UP(host->mclk, 257);
+ else if (variant->explicit_mclk_control)
+ mmc->f_min = clk_round_rate(host->clk, 100000);
else
mmc->f_min = DIV_ROUND_UP(host->mclk, 512);
/*
@@ -1483,9 +1584,14 @@ static int mmci_probe(struct amba_device *dev,
* the block, of course.
*/
if (mmc->f_max)
- mmc->f_max = min(host->mclk, mmc->f_max);
+ mmc->f_max = variant->explicit_mclk_control ?
+ min(variant->f_max, mmc->f_max) :
+ min(host->mclk, mmc->f_max);
else
- mmc->f_max = min(host->mclk, fmax);
+ mmc->f_max = variant->explicit_mclk_control ?
+ fmax : min(host->mclk, fmax);
+
+
dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
/* Get regulators and the supported OCR mask */
@@ -1752,6 +1858,12 @@ static struct amba_id mmci_ids[] = {
.mask = 0xf0ffffff,
.data = &variant_ux500v2,
},
+ /* Qualcomm variants */
+ {
+ .id = 0x00051180,
+ .mask = 0x000fffff,
+ .data = &variant_qcom,
+ },
{ 0, 0 },
};
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 347d942d740..a1f5e4f49e2 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -41,6 +41,15 @@
/* Modified PL180 on Versatile Express platform */
#define MCI_ARM_HWFCEN (1 << 12)
+/* Modified on Qualcomm Integrations */
+#define MCI_QCOM_CLK_WIDEBUS_8 (BIT(10) | BIT(11))
+#define MCI_QCOM_CLK_FLOWENA BIT(12)
+#define MCI_QCOM_CLK_INVERTOUT BIT(13)
+
+/* select in latch data and command in */
+#define MCI_QCOM_CLK_SELECT_IN_FBCLK BIT(15)
+#define MCI_QCOM_CLK_SELECT_IN_DDR_MODE (BIT(14) | BIT(15))
+
#define MMCIARGUMENT 0x008
#define MMCICOMMAND 0x00c
#define MCI_CPSM_RESPONSE (1 << 6)
@@ -54,6 +63,14 @@
#define MCI_ST_NIEN (1 << 13)
#define MCI_ST_CE_ATACMD (1 << 14)
+/* Modified on Qualcomm Integrations */
+#define MCI_QCOM_CSPM_DATCMD BIT(12)
+#define MCI_QCOM_CSPM_MCIABORT BIT(13)
+#define MCI_QCOM_CSPM_CCSENABLE BIT(14)
+#define MCI_QCOM_CSPM_CCSDISABLE BIT(15)
+#define MCI_QCOM_CSPM_AUTO_CMD19 BIT(16)
+#define MCI_QCOM_CSPM_AUTO_CMD21 BIT(21)
+
#define MMCIRESPCMD 0x010
#define MMCIRESPONSE0 0x014
#define MMCIRESPONSE1 0x018
@@ -191,6 +208,8 @@ struct mmci_host {
spinlock_t lock;
unsigned int mclk;
+ /* cached value of requested clk in set_ios */
+ unsigned int clock_cache;
unsigned int cclk;
u32 pwr_reg;
u32 pwr_reg_add;
@@ -210,6 +229,7 @@ struct mmci_host {
/* pio stuff */
struct sg_mapping_iter sg_miter;
unsigned int size;
+ int (*get_rx_fifocnt)(struct mmci_host *h, u32 status, int remain);
#ifdef CONFIG_DMA_ENGINE
/* DMA stuff */
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 74924a04026..b4b1efbf6c1 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -13,7 +13,6 @@
* warranty of any kind, whether express or implied.
*/
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index babfea03ba8..140885a5a4e 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -86,7 +86,8 @@ static int mxs_mmc_get_cd(struct mmc_host *mmc)
if (ret >= 0)
return ret;
- present = !(readl(ssp->base + HW_SSP_STATUS(ssp)) &
+ present = mmc->caps & MMC_CAP_NEEDS_POLL ||
+ !(readl(ssp->base + HW_SSP_STATUS(ssp)) &
BM_SSP_STATUS_CARD_DETECT);
if (mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b7b7558592..965672663ef 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -29,6 +29,7 @@
#include <linux/timer.h>
#include <linux/clk.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/of_gpio.h>
#include <linux/of_device.h>
#include <linux/omap-dmaengine.h>
@@ -36,6 +37,7 @@
#include <linux/mmc/core.h>
#include <linux/mmc/mmc.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/gpio.h>
#include <linux/regulator/consumer.h>
#include <linux/pinctrl/consumer.h>
@@ -54,6 +56,7 @@
#define OMAP_HSMMC_RSP54 0x0118
#define OMAP_HSMMC_RSP76 0x011C
#define OMAP_HSMMC_DATA 0x0120
+#define OMAP_HSMMC_PSTATE 0x0124
#define OMAP_HSMMC_HCTL 0x0128
#define OMAP_HSMMC_SYSCTL 0x012C
#define OMAP_HSMMC_STAT 0x0130
@@ -91,7 +94,10 @@
#define BCE (1 << 1)
#define FOUR_BIT (1 << 1)
#define HSPE (1 << 2)
+#define IWE (1 << 24)
#define DDR (1 << 19)
+#define CLKEXTFREE (1 << 16)
+#define CTPL (1 << 11)
#define DW8 (1 << 5)
#define OD 0x1
#define STAT_CLEAR 0xFFFFFFFF
@@ -101,11 +107,15 @@
#define SRD (1 << 26)
#define SOFTRESET (1 << 1)
+/* PSTATE */
+#define DLEV_DAT(x) (1 << (20 + (x)))
+
/* Interrupt masks for IE and ISE register */
#define CC_EN (1 << 0)
#define TC_EN (1 << 1)
#define BWR_EN (1 << 4)
#define BRR_EN (1 << 5)
+#define CIRQ_EN (1 << 8)
#define ERR_EN (1 << 15)
#define CTO_EN (1 << 16)
#define CCRC_EN (1 << 17)
@@ -140,7 +150,6 @@
#define VDD_3V0 3000000 /* 300000 uV */
#define VDD_165_195 (ffs(MMC_VDD_165_195) - 1)
-#define AUTO_CMD23 (1 << 1) /* Auto CMD23 support */
/*
* One controller can have multiple slots, like on some omap boards using
* omap.c controller driver. Luckily this is not currently done on any known
@@ -194,6 +203,7 @@ struct omap_hsmmc_host {
u32 sysctl;
u32 capa;
int irq;
+ int wake_irq;
int use_dma, dma_ch;
struct dma_chan *tx_chan;
struct dma_chan *rx_chan;
@@ -206,6 +216,9 @@ struct omap_hsmmc_host {
int req_in_progress;
unsigned long clk_rate;
unsigned int flags;
+#define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */
+#define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */
+#define HSMMC_WAKE_IRQ_ENABLED (1 << 2)
struct omap_hsmmc_next next_data;
struct omap_mmc_platform_data *pdata;
};
@@ -510,27 +523,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host,
struct mmc_command *cmd)
{
- unsigned int irq_mask;
+ u32 irq_mask = INT_EN_MASK;
+ unsigned long flags;
if (host->use_dma)
- irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN);
- else
- irq_mask = INT_EN_MASK;
+ irq_mask &= ~(BRR_EN | BWR_EN);
/* Disable timeout for erases */
if (cmd->opcode == MMC_ERASE)
irq_mask &= ~DTO_EN;
+ spin_lock_irqsave(&host->irq_lock, flags);
OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+ /* latch pending CIRQ, but don't signal MMC core */
+ if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+ irq_mask |= CIRQ_EN;
OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+ spin_unlock_irqrestore(&host->irq_lock, flags);
}
static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
{
- OMAP_HSMMC_WRITE(host->base, ISE, 0);
- OMAP_HSMMC_WRITE(host->base, IE, 0);
+ u32 irq_mask = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->irq_lock, flags);
+ /* no transfer running but need to keep cirq if enabled */
+ if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+ irq_mask |= CIRQ_EN;
+ OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+ OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+ spin_unlock_irqrestore(&host->irq_lock, flags);
}
/* Calculate divisor for the given clock frequency */
@@ -667,6 +693,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
capa = VS18;
}
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ hctl |= IWE;
+
OMAP_HSMMC_WRITE(host->base, HCTL,
OMAP_HSMMC_READ(host->base, HCTL) | hctl);
@@ -681,7 +710,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
&& time_before(jiffies, timeout))
;
- omap_hsmmc_disable_irq(host);
+ OMAP_HSMMC_WRITE(host->base, ISE, 0);
+ OMAP_HSMMC_WRITE(host->base, IE, 0);
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
/* Do not initialize card-specific things if the power is off */
if (host->power_mode == MMC_POWER_OFF)
@@ -1118,8 +1149,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
int status;
status = OMAP_HSMMC_READ(host->base, STAT);
- while (status & INT_EN_MASK && host->req_in_progress) {
- omap_hsmmc_do_irq(host, status);
+ while (status & (INT_EN_MASK | CIRQ_EN)) {
+ if (host->req_in_progress)
+ omap_hsmmc_do_irq(host, status);
+
+ if (status & CIRQ_EN)
+ mmc_signal_sdio_irq(host->mmc);
/* Flush posted write */
status = OMAP_HSMMC_READ(host->base, STAT);
@@ -1128,6 +1163,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
+{
+ struct omap_hsmmc_host *host = dev_id;
+
+ /* cirq is level triggered, disable to avoid infinite loop */
+ spin_lock(&host->irq_lock);
+ if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+ disable_irq_nosync(host->wake_irq);
+ host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+ }
+ spin_unlock(&host->irq_lock);
+ pm_request_resume(host->dev); /* no use counter */
+
+ return IRQ_HANDLED;
+}
+
static void set_sd_bus_power(struct omap_hsmmc_host *host)
{
unsigned long i;
@@ -1639,6 +1690,103 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
mmc_slot(host).init_card(card);
}
+static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct omap_hsmmc_host *host = mmc_priv(mmc);
+ u32 irq_mask, con;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->irq_lock, flags);
+
+ con = OMAP_HSMMC_READ(host->base, CON);
+ irq_mask = OMAP_HSMMC_READ(host->base, ISE);
+ if (enable) {
+ host->flags |= HSMMC_SDIO_IRQ_ENABLED;
+ irq_mask |= CIRQ_EN;
+ con |= CTPL | CLKEXTFREE;
+ } else {
+ host->flags &= ~HSMMC_SDIO_IRQ_ENABLED;
+ irq_mask &= ~CIRQ_EN;
+ con &= ~(CTPL | CLKEXTFREE);
+ }
+ OMAP_HSMMC_WRITE(host->base, CON, con);
+ OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+
+ /*
+ * if enable, piggy back detection on current request
+ * but always disable immediately
+ */
+ if (!host->req_in_progress || !enable)
+ OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+ /* flush posted write */
+ OMAP_HSMMC_READ(host->base, IE);
+
+ spin_unlock_irqrestore(&host->irq_lock, flags);
+}
+
+static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
+{
+ struct mmc_host *mmc = host->mmc;
+ int ret;
+
+ /*
+ * For omaps with wake-up path, wakeirq will be irq from pinctrl and
+ * for other omaps, wakeirq will be from GPIO (dat line remuxed to
+ * gpio). wakeirq is needed to detect sdio irq in runtime suspend state
+ * with functional clock disabled.
+ */
+ if (!host->dev->of_node || !host->wake_irq)
+ return -ENODEV;
+
+ /* Prevent auto-enabling of IRQ */
+ irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
+ ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ mmc_hostname(mmc), host);
+ if (ret) {
+ dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
+ goto err;
+ }
+
+ /*
+ * Some omaps don't have wake-up path from deeper idle states
+ * and need to remux SDIO DAT1 to GPIO for wake-up from idle.
+ */
+ if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
+ struct pinctrl *p = devm_pinctrl_get(host->dev);
+ if (!p) {
+ ret = -ENODEV;
+ goto err_free_irq;
+ }
+ if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_DEFAULT))) {
+ dev_info(host->dev, "missing default pinctrl state\n");
+ devm_pinctrl_put(p);
+ ret = -EINVAL;
+ goto err_free_irq;
+ }
+
+ if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_IDLE))) {
+ dev_info(host->dev, "missing idle pinctrl state\n");
+ devm_pinctrl_put(p);
+ ret = -EINVAL;
+ goto err_free_irq;
+ }
+ devm_pinctrl_put(p);
+ }
+
+ OMAP_HSMMC_WRITE(host->base, HCTL,
+ OMAP_HSMMC_READ(host->base, HCTL) | IWE);
+ return 0;
+
+err_free_irq:
+ devm_free_irq(host->dev, host->wake_irq, host);
+err:
+ dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
+ host->wake_irq = 0;
+ return ret;
+}
+
static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
{
u32 hctl, capa, value;
@@ -1691,7 +1839,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = {
.get_cd = omap_hsmmc_get_cd,
.get_ro = omap_hsmmc_get_ro,
.init_card = omap_hsmmc_init_card,
- /* NYET -- enable_sdio_irq */
+ .enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
};
#ifdef CONFIG_DEBUG_FS
@@ -1701,13 +1849,23 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
struct mmc_host *mmc = s->private;
struct omap_hsmmc_host *host = mmc_priv(mmc);
- seq_printf(s, "mmc%d:\n ctx_loss:\t%d\n\nregs:\n",
- mmc->index, host->context_loss);
+ seq_printf(s, "mmc%d:\n", mmc->index);
+ seq_printf(s, "sdio irq mode\t%s\n",
+ (mmc->caps & MMC_CAP_SDIO_IRQ) ? "interrupt" : "polling");
- pm_runtime_get_sync(host->dev);
+ if (mmc->caps & MMC_CAP_SDIO_IRQ) {
+ seq_printf(s, "sdio irq \t%s\n",
+ (host->flags & HSMMC_SDIO_IRQ_ENABLED) ? "enabled"
+ : "disabled");
+ }
+ seq_printf(s, "ctx_loss:\t%d\n", host->context_loss);
+ pm_runtime_get_sync(host->dev);
+ seq_puts(s, "\nregs:\n");
seq_printf(s, "CON:\t\t0x%08x\n",
OMAP_HSMMC_READ(host->base, CON));
+ seq_printf(s, "PSTATE:\t\t0x%08x\n",
+ OMAP_HSMMC_READ(host->base, PSTATE));
seq_printf(s, "HCTL:\t\t0x%08x\n",
OMAP_HSMMC_READ(host->base, HCTL));
seq_printf(s, "SYSCTL:\t\t0x%08x\n",
@@ -1761,6 +1919,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = {
static const struct omap_mmc_of_data omap4_mmc_of_data = {
.reg_offset = 0x100,
};
+static const struct omap_mmc_of_data am33xx_mmc_of_data = {
+ .reg_offset = 0x100,
+ .controller_flags = OMAP_HSMMC_SWAKEUP_MISSING,
+};
static const struct of_device_id omap_mmc_of_match[] = {
{
@@ -1777,6 +1939,10 @@ static const struct of_device_id omap_mmc_of_match[] = {
.compatible = "ti,omap4-hsmmc",
.data = &omap4_mmc_of_data,
},
+ {
+ .compatible = "ti,am33xx-hsmmc",
+ .data = &am33xx_mmc_of_data,
+ },
{},
};
MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
@@ -1850,7 +2016,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
const struct of_device_id *match;
dma_cap_mask_t mask;
unsigned tx_req, rx_req;
- struct pinctrl *pinctrl;
const struct omap_mmc_of_data *data;
void __iomem *base;
@@ -1913,6 +2078,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, host);
+ if (pdev->dev.of_node)
+ host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1);
+
mmc->ops = &omap_hsmmc_ops;
mmc->f_min = OMAP_MMC_MIN_CLOCK;
@@ -2061,10 +2229,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
omap_hsmmc_disable_irq(host);
- pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
- if (IS_ERR(pinctrl))
- dev_warn(&pdev->dev,
- "pins are not configured from the driver\n");
+ /*
+ * For now, only support SDIO interrupt if we have a separate
+ * wake-up interrupt configured from device tree. This is because
+ * the wake-up interrupt is needed for idle state and some
+ * platforms need special quirks. And we don't want to add new
+ * legacy mux platform init code callbacks any longer as we
+ * are moving to DT based booting anyways.
+ */
+ ret = omap_hsmmc_configure_wake_irq(host);
+ if (!ret)
+ mmc->caps |= MMC_CAP_SDIO_IRQ;
omap_hsmmc_protect_card(host);
@@ -2170,11 +2345,18 @@ static int omap_hsmmc_suspend(struct device *dev)
pm_runtime_get_sync(host->dev);
if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) {
- omap_hsmmc_disable_irq(host);
+ OMAP_HSMMC_WRITE(host->base, ISE, 0);
+ OMAP_HSMMC_WRITE(host->base, IE, 0);
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
OMAP_HSMMC_WRITE(host->base, HCTL,
OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
}
+ /* do not wake up due to sdio irq */
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+ disable_irq(host->wake_irq);
+
if (host->dbclk)
clk_disable_unprepare(host->dbclk);
@@ -2200,6 +2382,10 @@ static int omap_hsmmc_resume(struct device *dev)
omap_hsmmc_protect_card(host);
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+ enable_irq(host->wake_irq);
+
pm_runtime_mark_last_busy(host->dev);
pm_runtime_put_autosuspend(host->dev);
return 0;
@@ -2215,22 +2401,77 @@ static int omap_hsmmc_resume(struct device *dev)
static int omap_hsmmc_runtime_suspend(struct device *dev)
{
struct omap_hsmmc_host *host;
+ unsigned long flags;
+ int ret = 0;
host = platform_get_drvdata(to_platform_device(dev));
omap_hsmmc_context_save(host);
dev_dbg(dev, "disabled\n");
- return 0;
+ spin_lock_irqsave(&host->irq_lock, flags);
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+ /* disable sdio irq handling to prevent race */
+ OMAP_HSMMC_WRITE(host->base, ISE, 0);
+ OMAP_HSMMC_WRITE(host->base, IE, 0);
+
+ if (!(OMAP_HSMMC_READ(host->base, PSTATE) & DLEV_DAT(1))) {
+ /*
+ * dat1 line low, pending sdio irq
+ * race condition: possible irq handler running on
+ * multi-core, abort
+ */
+ dev_dbg(dev, "pending sdio irq, abort suspend\n");
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+ OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+ OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+ pm_runtime_mark_last_busy(dev);
+ ret = -EBUSY;
+ goto abort;
+ }
+
+ pinctrl_pm_select_idle_state(dev);
+
+ WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
+ enable_irq(host->wake_irq);
+ host->flags |= HSMMC_WAKE_IRQ_ENABLED;
+ } else {
+ pinctrl_pm_select_idle_state(dev);
+ }
+
+abort:
+ spin_unlock_irqrestore(&host->irq_lock, flags);
+ return ret;
}
static int omap_hsmmc_runtime_resume(struct device *dev)
{
struct omap_hsmmc_host *host;
+ unsigned long flags;
host = platform_get_drvdata(to_platform_device(dev));
omap_hsmmc_context_restore(host);
dev_dbg(dev, "enabled\n");
+ spin_lock_irqsave(&host->irq_lock, flags);
+ if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+ (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+ /* sdio irq flag can't change while in runtime suspend */
+ if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+ disable_irq_nosync(host->wake_irq);
+ host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+ }
+
+ pinctrl_pm_select_default_state(host->dev);
+
+ /* irq lost, if pinmux incorrect */
+ OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+ OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+ OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+ } else {
+ pinctrl_pm_select_default_state(host->dev);
+ }
+ spin_unlock_irqrestore(&host->irq_lock, flags);
return 0;
}
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f23782683a7..e5516a22636 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -12,6 +12,7 @@
*/
#include <linux/module.h>
+#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/clk.h>
#include <linux/mmc/host.h>
@@ -27,6 +28,7 @@
#include <mach/dma.h>
#include <mach/gpio-samsung.h>
+#include <linux/platform_data/dma-s3c24xx.h>
#include <linux/platform_data/mmc-s3cmci.h>
#include "s3cmci.h"
@@ -140,10 +142,6 @@ static const int dbgmap_debug = dbg_err | dbg_debug;
dev_dbg(&host->pdev->dev, args); \
} while (0)
-static struct s3c2410_dma_client s3cmci_dma_client = {
- .name = "s3c-mci",
-};
-
static void finalize_request(struct s3cmci_host *host);
static void s3cmci_send_request(struct mmc_host *mmc);
static void s3cmci_reset(struct s3cmci_host *host);
@@ -256,25 +254,8 @@ static inline bool s3cmci_host_usedma(struct s3cmci_host *host)
{
#ifdef CONFIG_MMC_S3C_PIO
return false;
-#elif defined(CONFIG_MMC_S3C_DMA)
+#else /* CONFIG_MMC_S3C_DMA */
return true;
-#else
- return host->dodma;
-#endif
-}
-
-/**
- * s3cmci_host_canpio - return true if host has pio code available
- *
- * Return true if the driver has been compiled with the PIO support code
- * available.
- */
-static inline bool s3cmci_host_canpio(void)
-{
-#ifdef CONFIG_MMC_S3C_PIO
- return true;
-#else
- return false;
#endif
}
@@ -841,60 +822,24 @@ static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch,
- void *buf_id, int size,
- enum s3c2410_dma_buffresult result)
+static void s3cmci_dma_done_callback(void *arg)
{
- struct s3cmci_host *host = buf_id;
+ struct s3cmci_host *host = arg;
unsigned long iflags;
- u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt;
-
- mci_csta = readl(host->base + S3C2410_SDICMDSTAT);
- mci_dsta = readl(host->base + S3C2410_SDIDSTA);
- mci_fsta = readl(host->base + S3C2410_SDIFSTA);
- mci_dcnt = readl(host->base + S3C2410_SDIDCNT);
BUG_ON(!host->mrq);
BUG_ON(!host->mrq->data);
- BUG_ON(!host->dmatogo);
spin_lock_irqsave(&host->complete_lock, iflags);
- if (result != S3C2410_RES_OK) {
- dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x "
- "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n",
- mci_csta, mci_dsta, mci_fsta,
- mci_dcnt, result, host->dmatogo);
-
- goto fail_request;
- }
-
- host->dmatogo--;
- if (host->dmatogo) {
- dbg(host, dbg_dma, "DMA DONE Size:%i DSTA:[%08x] "
- "DCNT:[%08x] toGo:%u\n",
- size, mci_dsta, mci_dcnt, host->dmatogo);
-
- goto out;
- }
-
- dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n",
- size, mci_dsta, mci_dcnt);
+ dbg(host, dbg_dma, "DMA FINISHED\n");
host->dma_complete = 1;
host->complete_what = COMPLETION_FINALIZE;
-out:
tasklet_schedule(&host->pio_tasklet);
spin_unlock_irqrestore(&host->complete_lock, iflags);
- return;
-fail_request:
- host->mrq->data->error = -EINVAL;
- host->complete_what = COMPLETION_FINALIZE;
- clear_imask(host);
-
- goto out;
}
static void finalize_request(struct s3cmci_host *host)
@@ -966,7 +911,7 @@ static void finalize_request(struct s3cmci_host *host)
* DMA channel and the fifo to clear out any garbage. */
if (mrq->data->error != 0) {
if (s3cmci_host_usedma(host))
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
+ dmaengine_terminate_all(host->dma);
if (host->is2440) {
/* Clear failure register and reset fifo. */
@@ -992,29 +937,6 @@ request_done:
mmc_request_done(host->mmc, mrq);
}
-static void s3cmci_dma_setup(struct s3cmci_host *host,
- enum dma_data_direction source)
-{
- static enum dma_data_direction last_source = -1;
- static int setup_ok;
-
- if (last_source == source)
- return;
-
- last_source = source;
-
- s3c2410_dma_devconfig(host->dma, source,
- host->mem->start + host->sdidata);
-
- if (!setup_ok) {
- s3c2410_dma_config(host->dma, 4);
- s3c2410_dma_set_buffdone_fn(host->dma,
- s3cmci_dma_done_callback);
- s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART);
- setup_ok = 1;
- }
-}
-
static void s3cmci_send_command(struct s3cmci_host *host,
struct mmc_command *cmd)
{
@@ -1162,43 +1084,45 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
{
- int dma_len, i;
int rw = data->flags & MMC_DATA_WRITE;
+ struct dma_async_tx_descriptor *desc;
+ struct dma_slave_config conf = {
+ .src_addr = host->mem->start + host->sdidata,
+ .dst_addr = host->mem->start + host->sdidata,
+ .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ };
BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
- s3cmci_dma_setup(host, rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
-
- dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
- rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- if (dma_len == 0)
- return -ENOMEM;
-
- host->dma_complete = 0;
- host->dmatogo = dma_len;
-
- for (i = 0; i < dma_len; i++) {
- int res;
-
- dbg(host, dbg_dma, "enqueue %i: %08x@%u\n", i,
- sg_dma_address(&data->sg[i]),
- sg_dma_len(&data->sg[i]));
+ /* Restore prescaler value */
+ writel(host->prescaler, host->base + S3C2410_SDIPRE);
- res = s3c2410_dma_enqueue(host->dma, host,
- sg_dma_address(&data->sg[i]),
- sg_dma_len(&data->sg[i]));
+ if (!rw)
+ conf.direction = DMA_DEV_TO_MEM;
+ else
+ conf.direction = DMA_MEM_TO_DEV;
- if (res) {
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
- return -EBUSY;
- }
- }
+ dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START);
+ dmaengine_slave_config(host->dma, &conf);
+ desc = dmaengine_prep_slave_sg(host->dma, data->sg, data->sg_len,
+ conf.direction,
+ DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+ if (!desc)
+ goto unmap_exit;
+ desc->callback = s3cmci_dma_done_callback;
+ desc->callback_param = host;
+ dmaengine_submit(desc);
+ dma_async_issue_pending(host->dma);
return 0;
+
+unmap_exit:
+ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ return -ENOMEM;
}
static void s3cmci_send_request(struct mmc_host *mmc)
@@ -1676,10 +1600,6 @@ static int s3cmci_probe(struct platform_device *pdev)
host->complete_what = COMPLETION_NONE;
host->pio_active = XFER_NONE;
-#ifdef CONFIG_MMC_S3C_PIODMA
- host->dodma = host->pdata->use_dma;
-#endif
-
host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!host->mem) {
dev_err(&pdev->dev,
@@ -1765,17 +1685,17 @@ static int s3cmci_probe(struct platform_device *pdev)
/* depending on the dma state, get a dma channel to use. */
if (s3cmci_host_usedma(host)) {
- host->dma = s3c2410_dma_request(DMACH_SDI, &s3cmci_dma_client,
- host);
- if (host->dma < 0) {
+ dma_cap_mask_t mask;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ host->dma = dma_request_slave_channel_compat(mask,
+ s3c24xx_dma_filter, (void *)DMACH_SDI, &pdev->dev, "rx-tx");
+ if (!host->dma) {
dev_err(&pdev->dev, "cannot get DMA channel.\n");
- if (!s3cmci_host_canpio()) {
- ret = -EBUSY;
- goto probe_free_gpio_wp;
- } else {
- dev_warn(&pdev->dev, "falling back to PIO.\n");
- host->dodma = 0;
- }
+ ret = -EBUSY;
+ goto probe_free_gpio_wp;
}
}
@@ -1787,7 +1707,7 @@ static int s3cmci_probe(struct platform_device *pdev)
goto probe_free_dma;
}
- ret = clk_enable(host->clk);
+ ret = clk_prepare_enable(host->clk);
if (ret) {
dev_err(&pdev->dev, "failed to enable clock source.\n");
goto clk_free;
@@ -1816,7 +1736,7 @@ static int s3cmci_probe(struct platform_device *pdev)
mmc->max_segs = 128;
dbg(host, dbg_debug,
- "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
+ "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%p.\n",
(host->is2440?"2440":""),
host->base, host->irq, host->irq_cd, host->dma);
@@ -1845,14 +1765,14 @@ static int s3cmci_probe(struct platform_device *pdev)
s3cmci_cpufreq_deregister(host);
free_dmabuf:
- clk_disable(host->clk);
+ clk_disable_unprepare(host->clk);
clk_free:
clk_put(host->clk);
probe_free_dma:
if (s3cmci_host_usedma(host))
- s3c2410_dma_free(host->dma, &s3cmci_dma_client);
+ dma_release_channel(host->dma);
probe_free_gpio_wp:
if (!host->pdata->no_wprotect)
@@ -1897,7 +1817,7 @@ static void s3cmci_shutdown(struct platform_device *pdev)
s3cmci_debugfs_remove(host);
s3cmci_cpufreq_deregister(host);
mmc_remove_host(mmc);
- clk_disable(host->clk);
+ clk_disable_unprepare(host->clk);
}
static int s3cmci_remove(struct platform_device *pdev)
@@ -1914,7 +1834,7 @@ static int s3cmci_remove(struct platform_device *pdev)
tasklet_disable(&host->pio_tasklet);
if (s3cmci_host_usedma(host))
- s3c2410_dma_free(host->dma, &s3cmci_dma_client);
+ dma_release_channel(host->dma);
free_irq(host->irq, host);
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index c76b53dbeb6..cc2e46cb5c6 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -26,7 +26,7 @@ struct s3cmci_host {
void __iomem *base;
int irq;
int irq_cd;
- int dma;
+ struct dma_chan *dma;
unsigned long clk_rate;
unsigned long clk_div;
@@ -36,8 +36,6 @@ struct s3cmci_host {
int is2440;
unsigned sdiimsk;
unsigned sdidata;
- int dodma;
- int dmatogo;
bool irq_disabled;
bool irq_enabled;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 8ce3c28cb76..8c5337002c5 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -124,9 +124,11 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
.chip = &sdhci_acpi_chip_int,
- .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_HW_RESET,
+ .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
+ MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR,
.caps2 = MMC_CAP2_HC_ERASE_SZ,
.flags = SDHCI_ACPI_RUNTIME_PM,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
};
static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 40573a58486..1a6661ed620 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -16,7 +16,6 @@
#include <linux/module.h>
#include <linux/of_device.h>
-#include <linux/regulator/consumer.h>
#include <linux/delay.h>
#include <linux/mmc/mmc.h>
#include <linux/slab.h>
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 52c42fcc284..c3a1debc928 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -103,6 +103,10 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
};
+static const struct sdhci_pci_fixes sdhci_intel_qrk = {
+ .quirks = SDHCI_QUIRK_NO_HISPD_BIT,
+};
+
static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot)
{
slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
@@ -264,7 +268,7 @@ static void sdhci_pci_int_hw_reset(struct sdhci_host *host)
static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
{
slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
- MMC_CAP_HW_RESET;
+ MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR;
slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
slot->hw_reset = sdhci_pci_int_hw_reset;
return 0;
@@ -279,6 +283,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
.allow_runtime_pm = true,
.probe_slot = byt_emmc_probe_slot,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
};
static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
@@ -753,6 +758,14 @@ static const struct pci_device_id pci_ids[] = {
{
.vendor = PCI_VENDOR_ID_INTEL,
+ .device = PCI_DEVICE_ID_INTEL_QRK_SD,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_intel_qrk,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_INTEL,
.device = PCI_DEVICE_ID_INTEL_MRST_SD0,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
@@ -1130,18 +1143,13 @@ static int sdhci_pci_suspend(struct device *dev)
goto err_pci_suspend;
}
- pci_save_state(pdev);
if (pm_flags & MMC_PM_KEEP_POWER) {
- if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) {
- pci_pme_active(pdev, true);
- pci_enable_wake(pdev, PCI_D3hot, 1);
- }
- pci_set_power_state(pdev, PCI_D3hot);
- } else {
- pci_enable_wake(pdev, PCI_D3hot, 0);
- pci_disable_device(pdev);
- pci_set_power_state(pdev, PCI_D3hot);
- }
+ if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+ device_init_wakeup(dev, true);
+ else
+ device_init_wakeup(dev, false);
+ } else
+ device_init_wakeup(dev, false);
return 0;
@@ -1162,12 +1170,6 @@ static int sdhci_pci_resume(struct device *dev)
if (!chip)
return 0;
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
-
if (chip->fixes && chip->fixes->resume) {
ret = chip->fixes->resume(chip);
if (ret)
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 6d718719659..c101477ef3b 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -17,6 +17,7 @@
#define PCI_DEVICE_ID_INTEL_CLV_SDIO2 0x08fb
#define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5
#define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6
+#define PCI_DEVICE_ID_INTEL_QRK_SD 0x08A7
/*
* PCI registers
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index f4f12894756..6f842fb8e6b 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -288,15 +288,13 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
int ret;
struct clk *clk;
- pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
+ pxa = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_pxa), GFP_KERNEL);
if (!pxa)
return -ENOMEM;
host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0);
- if (IS_ERR(host)) {
- kfree(pxa);
+ if (IS_ERR(host))
return PTR_ERR(host);
- }
if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) {
ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info());
@@ -308,7 +306,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
pltfm_host = sdhci_priv(host);
pltfm_host->priv = pxa;
- clk = clk_get(dev, NULL);
+ clk = devm_clk_get(dev, NULL);
if (IS_ERR(clk)) {
dev_err(dev, "failed to get io clock\n");
ret = PTR_ERR(clk);
@@ -389,11 +387,9 @@ err_add_host:
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
clk_disable_unprepare(clk);
- clk_put(clk);
err_clk_get:
err_mbus_win:
sdhci_pltfm_free(pdev);
- kfree(pxa);
return ret;
}
@@ -401,17 +397,14 @@ static int sdhci_pxav3_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
- struct sdhci_pxa *pxa = pltfm_host->priv;
pm_runtime_get_sync(&pdev->dev);
sdhci_remove_host(host, 1);
pm_runtime_disable(&pdev->dev);
clk_disable_unprepare(pltfm_host->clk);
- clk_put(pltfm_host->clk);
sdhci_pltfm_free(pdev);
- kfree(pxa);
return 0;
}
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
new file mode 100644
index 00000000000..328f348c724
--- /dev/null
+++ b/drivers/mmc/host/sdhci-st.c
@@ -0,0 +1,176 @@
+/*
+ * Support for SDHCI on STMicroelectronics SoCs
+ *
+ * Copyright (C) 2014 STMicroelectronics Ltd
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ * Contributors: Peter Griffin <peter.griffin@linaro.org>
+ *
+ * Based on sdhci-cns3xxx.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mmc/host.h>
+
+#include "sdhci-pltfm.h"
+
+static u32 sdhci_st_readl(struct sdhci_host *host, int reg)
+{
+ u32 ret;
+
+ switch (reg) {
+ case SDHCI_CAPABILITIES:
+ ret = readl_relaxed(host->ioaddr + reg);
+ /* Support 3.3V and 1.8V */
+ ret &= ~SDHCI_CAN_VDD_300;
+ break;
+ default:
+ ret = readl_relaxed(host->ioaddr + reg);
+ }
+ return ret;
+}
+
+static const struct sdhci_ops sdhci_st_ops = {
+ .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .read_l = sdhci_st_readl,
+ .reset = sdhci_reset,
+};
+
+static const struct sdhci_pltfm_data sdhci_st_pdata = {
+ .ops = &sdhci_st_ops,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+};
+
+
+static int sdhci_st_probe(struct platform_device *pdev)
+{
+ struct sdhci_host *host;
+ struct sdhci_pltfm_host *pltfm_host;
+ struct clk *clk;
+ int ret = 0;
+ u16 host_version;
+
+ clk = devm_clk_get(&pdev->dev, "mmc");
+ if (IS_ERR(clk)) {
+ dev_err(&pdev->dev, "Peripheral clk not found\n");
+ return PTR_ERR(clk);
+ }
+
+ host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
+ if (IS_ERR(host)) {
+ dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
+ return PTR_ERR(host);
+ }
+
+ ret = mmc_of_parse(host->mmc);
+
+ if (ret) {
+ dev_err(&pdev->dev, "Failed mmc_of_parse\n");
+ return ret;
+ }
+
+ clk_prepare_enable(clk);
+
+ pltfm_host = sdhci_priv(host);
+ pltfm_host->clk = clk;
+
+ ret = sdhci_add_host(host);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed sdhci_add_host\n");
+ goto err_out;
+ }
+
+ platform_set_drvdata(pdev, host);
+
+ host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
+
+ dev_info(&pdev->dev, "SDHCI ST Initialised: Host Version: 0x%x Vendor Version 0x%x\n",
+ ((host_version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT),
+ ((host_version & SDHCI_VENDOR_VER_MASK) >>
+ SDHCI_VENDOR_VER_SHIFT));
+
+ return 0;
+
+err_out:
+ clk_disable_unprepare(clk);
+ sdhci_pltfm_free(pdev);
+
+ return ret;
+}
+
+static int sdhci_st_remove(struct platform_device *pdev)
+{
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ clk_disable_unprepare(pltfm_host->clk);
+
+ return sdhci_pltfm_unregister(pdev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sdhci_st_suspend(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ int ret = sdhci_suspend_host(host);
+
+ if (ret)
+ goto out;
+
+ clk_disable_unprepare(pltfm_host->clk);
+out:
+ return ret;
+}
+
+static int sdhci_st_resume(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ clk_prepare_enable(pltfm_host->clk);
+
+ return sdhci_resume_host(host);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sdhci_st_pmops, sdhci_st_suspend, sdhci_st_resume);
+
+static const struct of_device_id st_sdhci_match[] = {
+ { .compatible = "st,sdhci" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, st_sdhci_match);
+
+static struct platform_driver sdhci_st_driver = {
+ .probe = sdhci_st_probe,
+ .remove = sdhci_st_remove,
+ .driver = {
+ .name = "sdhci-st",
+ .pm = &sdhci_st_pmops,
+ .of_match_table = of_match_ptr(st_sdhci_match),
+ },
+};
+
+module_platform_driver(sdhci_st_driver);
+
+MODULE_DESCRIPTION("SDHCI driver for STMicroelectronics SoCs");
+MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:st-sdhci");
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index d93a063a36f..33100d10d17 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -26,8 +26,6 @@
#include <linux/mmc/host.h>
#include <linux/mmc/slot-gpio.h>
-#include <asm/gpio.h>
-
#include "sdhci-pltfm.h"
/* Tegra SDHOST controller vendor register definitions */
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 47055f3f01b..37b2a9ae52e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1223,8 +1223,16 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock);
static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
unsigned short vdd)
{
+ struct mmc_host *mmc = host->mmc;
u8 pwr = 0;
+ if (!IS_ERR(mmc->supply.vmmc)) {
+ spin_unlock_irq(&host->lock);
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+ spin_lock_irq(&host->lock);
+ return;
+ }
+
if (mode != MMC_POWER_OFF) {
switch (1 << vdd) {
case MMC_VDD_165_195:
@@ -1283,12 +1291,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
mdelay(10);
}
-
- if (host->vmmc) {
- spin_unlock_irq(&host->lock);
- mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
- spin_lock_irq(&host->lock);
- }
}
/*****************************************************************************\
@@ -1440,13 +1442,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
{
unsigned long flags;
u8 ctrl;
+ struct mmc_host *mmc = host->mmc;
spin_lock_irqsave(&host->lock, flags);
if (host->flags & SDHCI_DEVICE_DEAD) {
spin_unlock_irqrestore(&host->lock, flags);
- if (host->vmmc && ios->power_mode == MMC_POWER_OFF)
- mmc_regulator_set_ocr(host->mmc, host->vmmc, 0);
+ if (!IS_ERR(mmc->supply.vmmc) &&
+ ios->power_mode == MMC_POWER_OFF)
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
return;
}
@@ -1530,7 +1534,6 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
host->ops->set_clock(host, host->clock);
}
-
/* Reset SD Clock Enable */
clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
clk &= ~SDHCI_CLOCK_CARD_EN;
@@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
struct mmc_ios *ios)
{
+ struct mmc_host *mmc = host->mmc;
u16 ctrl;
int ret;
@@ -1725,11 +1729,12 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
ctrl &= ~SDHCI_CTRL_VDD_180;
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
- if (host->vqmmc) {
- ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000);
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000,
+ 3600000);
if (ret) {
pr_warning("%s: Switching to 3.3V signalling voltage "
- " failed\n", mmc_hostname(host->mmc));
+ " failed\n", mmc_hostname(mmc));
return -EIO;
}
}
@@ -1742,16 +1747,16 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
return 0;
pr_warning("%s: 3.3V regulator output did not became stable\n",
- mmc_hostname(host->mmc));
+ mmc_hostname(mmc));
return -EAGAIN;
case MMC_SIGNAL_VOLTAGE_180:
- if (host->vqmmc) {
- ret = regulator_set_voltage(host->vqmmc,
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_set_voltage(mmc->supply.vqmmc,
1700000, 1950000);
if (ret) {
pr_warning("%s: Switching to 1.8V signalling voltage "
- " failed\n", mmc_hostname(host->mmc));
+ " failed\n", mmc_hostname(mmc));
return -EIO;
}
}
@@ -1763,24 +1768,22 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
ctrl |= SDHCI_CTRL_VDD_180;
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
- /* Wait for 5ms */
- usleep_range(5000, 5500);
-
/* 1.8V regulator output should be stable within 5 ms */
ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
if (ctrl & SDHCI_CTRL_VDD_180)
return 0;
pr_warning("%s: 1.8V regulator output did not became stable\n",
- mmc_hostname(host->mmc));
+ mmc_hostname(mmc));
return -EAGAIN;
case MMC_SIGNAL_VOLTAGE_120:
- if (host->vqmmc) {
- ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000);
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000,
+ 1300000);
if (ret) {
pr_warning("%s: Switching to 1.2V signalling voltage "
- " failed\n", mmc_hostname(host->mmc));
+ " failed\n", mmc_hostname(mmc));
return -EIO;
}
}
@@ -2643,7 +2646,6 @@ static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
int sdhci_runtime_suspend_host(struct sdhci_host *host)
{
unsigned long flags;
- int ret = 0;
/* Disable tuning since we are suspending */
if (host->flags & SDHCI_USING_RETUNING_TIMER) {
@@ -2663,14 +2665,14 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
host->runtime_suspended = true;
spin_unlock_irqrestore(&host->lock, flags);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host);
int sdhci_runtime_resume_host(struct sdhci_host *host)
{
unsigned long flags;
- int ret = 0, host_flags = host->flags;
+ int host_flags = host->flags;
if (host_flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
if (host->ops->enable_dma)
@@ -2709,7 +2711,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
spin_unlock_irqrestore(&host->lock, flags);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host);
@@ -2820,12 +2822,12 @@ int sdhci_add_host(struct sdhci_host *host)
* (128) and potentially one alignment transfer for
* each of those entries.
*/
- host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc),
+ host->adma_desc = dma_alloc_coherent(mmc_dev(mmc),
ADMA_SIZE, &host->adma_addr,
GFP_KERNEL);
host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
if (!host->adma_desc || !host->align_buffer) {
- dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
pr_warning("%s: Unable to allocate ADMA "
@@ -2838,7 +2840,7 @@ int sdhci_add_host(struct sdhci_host *host)
pr_warning("%s: unable to allocate aligned ADMA descriptor\n",
mmc_hostname(mmc));
host->flags &= ~SDHCI_USE_ADMA;
- dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
host->adma_desc = NULL;
@@ -2853,7 +2855,7 @@ int sdhci_add_host(struct sdhci_host *host)
*/
if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) {
host->dma_mask = DMA_BIT_MASK(64);
- mmc_dev(host->mmc)->dma_mask = &host->dma_mask;
+ mmc_dev(mmc)->dma_mask = &host->dma_mask;
}
if (host->version >= SDHCI_SPEC_300)
@@ -2959,28 +2961,25 @@ int sdhci_add_host(struct sdhci_host *host)
mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
- !(host->mmc->caps & MMC_CAP_NONREMOVABLE))
+ !(mmc->caps & MMC_CAP_NONREMOVABLE))
mmc->caps |= MMC_CAP_NEEDS_POLL;
+ /* If there are external regulators, get them */
+ if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
- host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc");
- if (IS_ERR_OR_NULL(host->vqmmc)) {
- if (PTR_ERR(host->vqmmc) < 0) {
- pr_info("%s: no vqmmc regulator found\n",
- mmc_hostname(mmc));
- host->vqmmc = NULL;
- }
- } else {
- ret = regulator_enable(host->vqmmc);
- if (!regulator_is_supported_voltage(host->vqmmc, 1700000,
- 1950000))
+ if (!IS_ERR(mmc->supply.vqmmc)) {
+ ret = regulator_enable(mmc->supply.vqmmc);
+ if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000,
+ 1950000))
caps[1] &= ~(SDHCI_SUPPORT_SDR104 |
SDHCI_SUPPORT_SDR50 |
SDHCI_SUPPORT_DDR50);
if (ret) {
pr_warn("%s: Failed to enable vqmmc regulator: %d\n",
mmc_hostname(mmc), ret);
- host->vqmmc = NULL;
+ mmc->supply.vqmmc = NULL;
}
}
@@ -3041,34 +3040,6 @@ int sdhci_add_host(struct sdhci_host *host)
ocr_avail = 0;
- host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc");
- if (IS_ERR_OR_NULL(host->vmmc)) {
- if (PTR_ERR(host->vmmc) < 0) {
- pr_info("%s: no vmmc regulator found\n",
- mmc_hostname(mmc));
- host->vmmc = NULL;
- }
- }
-
-#ifdef CONFIG_REGULATOR
- /*
- * Voltage range check makes sense only if regulator reports
- * any voltage value.
- */
- if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) {
- ret = regulator_is_supported_voltage(host->vmmc, 2700000,
- 3600000);
- if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330)))
- caps[0] &= ~SDHCI_CAN_VDD_330;
- if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300)))
- caps[0] &= ~SDHCI_CAN_VDD_300;
- ret = regulator_is_supported_voltage(host->vmmc, 1700000,
- 1950000);
- if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180)))
- caps[0] &= ~SDHCI_CAN_VDD_180;
- }
-#endif /* CONFIG_REGULATOR */
-
/*
* According to SD Host Controller spec v3.00, if the Host System
* can afford more than 150mA, Host Driver should set XPC to 1. Also
@@ -3077,8 +3048,8 @@ int sdhci_add_host(struct sdhci_host *host)
* value.
*/
max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
- if (!max_current_caps && host->vmmc) {
- u32 curr = regulator_get_current_limit(host->vmmc);
+ if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
+ u32 curr = regulator_get_current_limit(mmc->supply.vmmc);
if (curr > 0) {
/* convert to SDHCI_MAX_CURRENT format */
@@ -3118,8 +3089,12 @@ int sdhci_add_host(struct sdhci_host *host)
SDHCI_MAX_CURRENT_MULTIPLIER;
}
+ /* If OCR set by external regulators, use it instead */
+ if (mmc->ocr_avail)
+ ocr_avail = mmc->ocr_avail;
+
if (host->ocr_mask)
- ocr_avail = host->ocr_mask;
+ ocr_avail &= host->ocr_mask;
mmc->ocr_avail = ocr_avail;
mmc->ocr_avail_sdio = ocr_avail;
@@ -3273,6 +3248,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host);
void sdhci_remove_host(struct sdhci_host *host, int dead)
{
+ struct mmc_host *mmc = host->mmc;
unsigned long flags;
if (dead) {
@@ -3282,7 +3258,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
if (host->mrq) {
pr_err("%s: Controller removed during "
- " transfer!\n", mmc_hostname(host->mmc));
+ " transfer!\n", mmc_hostname(mmc));
host->mrq->cmd->error = -ENOMEDIUM;
tasklet_schedule(&host->finish_tasklet);
@@ -3293,7 +3269,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
sdhci_disable_card_detection(host);
- mmc_remove_host(host->mmc);
+ mmc_remove_host(mmc);
#ifdef SDHCI_USE_LEDS_CLASS
led_classdev_unregister(&host->led);
@@ -3310,18 +3286,14 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
tasklet_kill(&host->finish_tasklet);
- if (host->vmmc) {
- regulator_disable(host->vmmc);
- regulator_put(host->vmmc);
- }
+ if (!IS_ERR(mmc->supply.vmmc))
+ regulator_disable(mmc->supply.vmmc);
- if (host->vqmmc) {
- regulator_disable(host->vqmmc);
- regulator_put(host->vqmmc);
- }
+ if (!IS_ERR(mmc->supply.vqmmc))
+ regulator_disable(mmc->supply.vqmmc);
if (host->adma_desc)
- dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 656fbba4c42..d11708c815d 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -386,7 +386,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
struct sh_mmcif_plat_data *pdata,
enum dma_transfer_direction direction)
{
- struct dma_slave_config cfg;
+ struct dma_slave_config cfg = { 0, };
struct dma_chan *chan;
unsigned int slave_id;
struct resource *res;
@@ -417,8 +417,15 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
/* In the OF case the driver will get the slave ID from the DT */
cfg.slave_id = slave_id;
cfg.direction = direction;
- cfg.dst_addr = res->start + MMCIF_CE_DATA;
- cfg.src_addr = 0;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ cfg.src_addr = res->start + MMCIF_CE_DATA;
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ } else {
+ cfg.dst_addr = res->start + MMCIF_CE_DATA;
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ }
+
ret = dmaengine_slave_config(chan, &cfg);
if (ret < 0) {
dma_release_channel(chan);
@@ -1378,26 +1385,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "Get irq error\n");
return -ENXIO;
}
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "platform_get_resource error.\n");
- return -ENXIO;
- }
- reg = ioremap(res->start, resource_size(res));
- if (!reg) {
- dev_err(&pdev->dev, "ioremap error.\n");
- return -ENOMEM;
- }
+ reg = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev);
- if (!mmc) {
- ret = -ENOMEM;
- goto ealloch;
- }
+ if (!mmc)
+ return -ENOMEM;
ret = mmc_of_parse(mmc);
if (ret < 0)
- goto eofparse;
+ goto err_host;
host = mmc_priv(mmc);
host->mmc = mmc;
@@ -1427,19 +1427,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
pm_runtime_enable(&pdev->dev);
host->power = false;
- host->hclk = clk_get(&pdev->dev, NULL);
+ host->hclk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(host->hclk)) {
ret = PTR_ERR(host->hclk);
dev_err(&pdev->dev, "cannot get clock: %d\n", ret);
- goto eclkget;
+ goto err_pm;
}
ret = sh_mmcif_clk_update(host);
if (ret < 0)
- goto eclkupdate;
+ goto err_pm;
ret = pm_runtime_resume(&pdev->dev);
if (ret < 0)
- goto eresume;
+ goto err_clk;
INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work);
@@ -1447,65 +1447,55 @@ static int sh_mmcif_probe(struct platform_device *pdev)
sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
name = irq[1] < 0 ? dev_name(&pdev->dev) : "sh_mmc:error";
- ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, name, host);
+ ret = devm_request_threaded_irq(&pdev->dev, irq[0], sh_mmcif_intr,
+ sh_mmcif_irqt, 0, name, host);
if (ret) {
dev_err(&pdev->dev, "request_irq error (%s)\n", name);
- goto ereqirq0;
+ goto err_clk;
}
if (irq[1] >= 0) {
- ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt,
- 0, "sh_mmc:int", host);
+ ret = devm_request_threaded_irq(&pdev->dev, irq[1],
+ sh_mmcif_intr, sh_mmcif_irqt,
+ 0, "sh_mmc:int", host);
if (ret) {
dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n");
- goto ereqirq1;
+ goto err_clk;
}
}
if (pd && pd->use_cd_gpio) {
ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0);
if (ret < 0)
- goto erqcd;
+ goto err_clk;
}
mutex_init(&host->thread_lock);
- clk_disable_unprepare(host->hclk);
ret = mmc_add_host(mmc);
if (ret < 0)
- goto emmcaddh;
+ goto err_clk;
dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
- dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION);
- dev_dbg(&pdev->dev, "chip ver H'%04x\n",
- sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff);
+ dev_info(&pdev->dev, "Chip version 0x%04x, clock rate %luMHz\n",
+ sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff,
+ clk_get_rate(host->hclk) / 1000000UL);
+
+ clk_disable_unprepare(host->hclk);
return ret;
-emmcaddh:
-erqcd:
- if (irq[1] >= 0)
- free_irq(irq[1], host);
-ereqirq1:
- free_irq(irq[0], host);
-ereqirq0:
- pm_runtime_suspend(&pdev->dev);
-eresume:
+err_clk:
clk_disable_unprepare(host->hclk);
-eclkupdate:
- clk_put(host->hclk);
-eclkget:
+err_pm:
pm_runtime_disable(&pdev->dev);
-eofparse:
+err_host:
mmc_free_host(mmc);
-ealloch:
- iounmap(reg);
return ret;
}
static int sh_mmcif_remove(struct platform_device *pdev)
{
struct sh_mmcif_host *host = platform_get_drvdata(pdev);
- int irq[2];
host->dying = true;
clk_prepare_enable(host->hclk);
@@ -1523,16 +1513,6 @@ static int sh_mmcif_remove(struct platform_device *pdev)
*/
cancel_delayed_work_sync(&host->timeout_work);
- if (host->addr)
- iounmap(host->addr);
-
- irq[0] = platform_get_irq(pdev, 0);
- irq[1] = platform_get_irq(pdev, 1);
-
- free_irq(irq[0], host);
- if (irq[1] >= 0)
- free_irq(irq[1], host);
-
clk_disable_unprepare(host->hclk);
mmc_free_host(host->mmc);
pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 03e7b280cb4..eb8f1d5c34b 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -294,6 +294,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
cfg.slave_id = pdata->dma->slave_id_tx;
cfg.direction = DMA_MEM_TO_DEV;
cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->pdata->bus_shift);
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
cfg.src_addr = 0;
ret = dmaengine_slave_config(host->chan_tx, &cfg);
if (ret < 0)
@@ -312,6 +313,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
cfg.slave_id = pdata->dma->slave_id_rx;
cfg.direction = DMA_DEV_TO_MEM;
cfg.src_addr = cfg.dst_addr;
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
cfg.dst_addr = 0;
ret = dmaengine_slave_config(host->chan_rx, &cfg);
if (ret < 0)
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 282891a8e45..54181b4f6e9 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -72,7 +72,6 @@
#define BM_SPI_CS 0x20
#define BM_SD_POWER 0x40
#define BM_SOFT_RESET 0x80
-#define BM_ONEBIT_MASK 0xFD
/* SDMMC_BLKLEN bit fields */
#define BLKL_CRCERR_ABORT 0x0800
@@ -120,6 +119,8 @@
#define STS2_DATARSP_BUSY 0x20
#define STS2_DIS_FORCECLK 0x80
+/* SDMMC_EXTCTRL bit fields */
+#define EXT_EIGHTBIT 0x04
/* MMC/SD DMA Controller Registers */
#define SDDMA_GCR 0x100
@@ -672,7 +673,7 @@ static void wmt_mci_request(struct mmc_host *mmc, struct mmc_request *req)
static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
struct wmt_mci_priv *priv;
- u32 reg_tmp;
+ u32 busmode, extctrl;
priv = mmc_priv(mmc);
@@ -687,28 +688,26 @@ static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
if (ios->clock != 0)
clk_set_rate(priv->clk_sdmmc, ios->clock);
+ busmode = readb(priv->sdmmc_base + SDMMC_BUSMODE);
+ extctrl = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
+
+ busmode &= ~(BM_EIGHTBIT_MODE | BM_FOURBIT_MODE);
+ extctrl &= ~EXT_EIGHTBIT;
+
switch (ios->bus_width) {
case MMC_BUS_WIDTH_8:
- reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
- writeb(reg_tmp | 0x04, priv->sdmmc_base + SDMMC_EXTCTRL);
+ busmode |= BM_EIGHTBIT_MODE;
+ extctrl |= EXT_EIGHTBIT;
break;
case MMC_BUS_WIDTH_4:
- reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
- writeb(reg_tmp | BM_FOURBIT_MODE, priv->sdmmc_base +
- SDMMC_BUSMODE);
-
- reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
- writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
+ busmode |= BM_FOURBIT_MODE;
break;
case MMC_BUS_WIDTH_1:
- reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
- writeb(reg_tmp & BM_ONEBIT_MASK, priv->sdmmc_base +
- SDMMC_BUSMODE);
-
- reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
- writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
break;
}
+
+ writeb(busmode, priv->sdmmc_base + SDMMC_BUSMODE);
+ writeb(extctrl, priv->sdmmc_base + SDMMC_EXTCTRL);
}
static int wmt_mci_get_ro(struct mmc_host *mmc)
@@ -830,7 +829,7 @@ static int wmt_mci_probe(struct platform_device *pdev)
goto fail3;
}
- ret = request_irq(dma_irq, wmt_mci_dma_isr, 32, "sdmmc", priv);
+ ret = request_irq(dma_irq, wmt_mci_dma_isr, 0, "sdmmc", priv);
if (ret) {
dev_err(&pdev->dev, "Register DMA IRQ fail\n");
goto fail4;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index af7c40ac145..e1a8f4e1998 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -581,7 +581,11 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
struct xgene_enet_desc_ring *ring;
struct xgene_enet_pdata *pdata = netdev_priv(ndev);
struct device *dev = ndev_to_dev(ndev);
- u32 size;
+ int size;
+
+ size = xgene_enet_get_ring_size(dev, cfgsize);
+ if (size < 0)
+ return NULL;
ring = devm_kzalloc(dev, sizeof(struct xgene_enet_desc_ring),
GFP_KERNEL);
@@ -593,7 +597,6 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
ring->cfgsize = cfgsize;
ring->id = ring_id;
- size = xgene_enet_get_ring_size(dev, cfgsize);
ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma,
GFP_KERNEL);
if (!ring->desc_addr) {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a3dd5dc64f4..4296b3d26f0 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14093,8 +14093,9 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
spin_lock_bh(&tp->lock);
if (!tp->hw_stats) {
+ *stats = tp->net_stats_prev;
spin_unlock_bh(&tp->lock);
- return &tp->net_stats_prev;
+ return stats;
}
tg3_get_nstats(tp, stats);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 811f1351db7..43e08d0bc3d 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -897,5 +897,6 @@ void be_roce_dev_remove(struct be_adapter *);
*/
void be_roce_dev_open(struct be_adapter *);
void be_roce_dev_close(struct be_adapter *);
+void be_roce_dev_shutdown(struct be_adapter *);
#endif /* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index db4ff14ff18..9cdeda54674 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5014,6 +5014,7 @@ static void be_shutdown(struct pci_dev *pdev)
if (!adapter)
return;
+ be_roce_dev_shutdown(adapter);
cancel_delayed_work_sync(&adapter->work);
cancel_delayed_work_sync(&adapter->func_recovery_work);
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 5bf16603a3e..ef4672dc735 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -120,7 +120,8 @@ static void _be_roce_dev_open(struct be_adapter *adapter)
{
if (ocrdma_drv && adapter->ocrdma_dev &&
ocrdma_drv->state_change_handler)
- ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0);
+ ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+ BE_DEV_UP);
}
void be_roce_dev_open(struct be_adapter *adapter)
@@ -136,7 +137,8 @@ static void _be_roce_dev_close(struct be_adapter *adapter)
{
if (ocrdma_drv && adapter->ocrdma_dev &&
ocrdma_drv->state_change_handler)
- ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1);
+ ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+ BE_DEV_DOWN);
}
void be_roce_dev_close(struct be_adapter *adapter)
@@ -148,6 +150,18 @@ void be_roce_dev_close(struct be_adapter *adapter)
}
}
+void be_roce_dev_shutdown(struct be_adapter *adapter)
+{
+ if (be_roce_supported(adapter)) {
+ mutex_lock(&be_adapter_list_lock);
+ if (ocrdma_drv && adapter->ocrdma_dev &&
+ ocrdma_drv->state_change_handler)
+ ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+ BE_DEV_SHUTDOWN);
+ mutex_unlock(&be_adapter_list_lock);
+ }
+}
+
int be_roce_register_driver(struct ocrdma_driver *drv)
{
struct be_adapter *dev;
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index a3d9e96c18e..e6f7eb1a7d8 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -62,7 +62,8 @@ struct ocrdma_driver {
enum {
BE_DEV_UP = 0,
- BE_DEV_DOWN = 1
+ BE_DEV_DOWN = 1,
+ BE_DEV_SHUTDOWN = 2
};
/* APIs for RoCE driver to register callback handlers,
diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile
index 775d9969b5c..cd473e29524 100644
--- a/drivers/net/ethernet/ibm/ehea/Makefile
+++ b/drivers/net/ethernet/ibm/ehea/Makefile
@@ -1,6 +1,6 @@
#
# Makefile for the eHEA ethernet device driver for IBM eServer System p
#
-ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o
+ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o
obj-$(CONFIG_EHEA) += ehea.o
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index 58856032298..06edfca1a35 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -47,7 +47,7 @@ static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
* e1000_mng_enable_host_if - Checks host interface is enabled
* @hw: pointer to the HW structure
*
- * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ * Returns 0 upon success, else -E1000_ERR_HOST_INTERFACE_COMMAND
*
* This function checks whether the HOST IF is enabled for command operation
* and also checks whether the previous command is completed. It busy waits
@@ -78,7 +78,7 @@ static s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
}
if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
- e_dbg("Previous command timeout failed .\n");
+ e_dbg("Previous command timeout failed.\n");
return -E1000_ERR_HOST_INTERFACE_COMMAND;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 6938fc1ad87..5d01db1d789 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -33,6 +33,7 @@
#include <scsi/fc/fc_fcoe.h>
#include <scsi/libfc.h>
#include <scsi/libfcoe.h>
+#include <uapi/linux/dcbnl.h>
#include "i40e.h"
#include "i40e_fcoe.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 51bc03072ed..871474f6fe6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4415,13 +4415,13 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
switch (vsi->back->hw.phy.link_info.link_speed) {
case I40E_LINK_SPEED_40GB:
- strncpy(speed, "40 Gbps", SPEED_SIZE);
+ strlcpy(speed, "40 Gbps", SPEED_SIZE);
break;
case I40E_LINK_SPEED_10GB:
- strncpy(speed, "10 Gbps", SPEED_SIZE);
+ strlcpy(speed, "10 Gbps", SPEED_SIZE);
break;
case I40E_LINK_SPEED_1GB:
- strncpy(speed, "1000 Mbps", SPEED_SIZE);
+ strlcpy(speed, "1000 Mbps", SPEED_SIZE);
break;
default:
break;
@@ -4429,16 +4429,16 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
switch (vsi->back->hw.fc.current_mode) {
case I40E_FC_FULL:
- strncpy(fc, "RX/TX", FC_SIZE);
+ strlcpy(fc, "RX/TX", FC_SIZE);
break;
case I40E_FC_TX_PAUSE:
- strncpy(fc, "TX", FC_SIZE);
+ strlcpy(fc, "TX", FC_SIZE);
break;
case I40E_FC_RX_PAUSE:
- strncpy(fc, "RX", FC_SIZE);
+ strlcpy(fc, "RX", FC_SIZE);
break;
default:
- strncpy(fc, "None", FC_SIZE);
+ strlcpy(fc, "None", FC_SIZE);
break;
}
@@ -5839,7 +5839,7 @@ static void i40e_send_version(struct i40e_pf *pf)
dv.minor_version = DRV_VERSION_MINOR;
dv.build_version = DRV_VERSION_BUILD;
dv.subbuild_version = 0;
- strncpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
+ strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
}
@@ -6293,7 +6293,7 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
if (alloc_qvectors) {
/* allocate memory for q_vector pointers */
- size = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors;
+ size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
vsi->q_vectors = kzalloc(size, GFP_KERNEL);
if (!vsi->q_vectors) {
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 97bda3dffd4..25c4f9a3011 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -251,9 +251,9 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
*
* Writes a 16 bit words buffer to the Shadow RAM using the admin command.
**/
-i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
- u32 offset, u16 words, void *data,
- bool last_command)
+static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 words, void *data,
+ bool last_command)
{
i40e_status ret_code = I40E_ERR_NVM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5d940a26055..65a4a0f88ea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1311,6 +1311,15 @@ static struct mlx4_cmd_info cmd_info[] = {
.wrapper = mlx4_MAD_IFC_wrapper
},
{
+ .opcode = MLX4_CMD_MAD_DEMUX,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
+ {
.opcode = MLX4_CMD_QUERY_IF_STAT,
.has_inbox = false,
.has_outbox = true,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 688e1eabab2..494753e44ae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[7] = "FSM (MAC anti-spoofing) support",
[8] = "Dynamic QP updates support",
[9] = "Device managed flow steering IPoIB support",
- [10] = "TCP/IP offloads/flow-steering for VXLAN support"
+ [10] = "TCP/IP offloads/flow-steering for VXLAN support",
+ [11] = "MAD DEMUX (Secure-Host) support"
};
int i;
@@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
#define QUERY_DEV_CAP_VXLAN 0x9e
+#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
dev_cap->flags2 = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(dev_cap->max_counters, outbox,
QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
+ MLX4_GET(field32, outbox,
+ QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
+ if (field32 & (1 << 0))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
+
MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
if (field32 & (1 << 16))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work)
out:
mlx4_free_cmd_mailbox(dev, mailbox);
}
+
+static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10
+#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20
+#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40
+#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70
+
+ u32 set_attr_mask, getresp_attr_mask;
+ u32 trap_attr_mask, traprepress_attr_mask;
+
+ MLX4_GET(set_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
+ set_attr_mask);
+
+ MLX4_GET(getresp_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
+ getresp_attr_mask);
+
+ MLX4_GET(trap_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
+ trap_attr_mask);
+
+ MLX4_GET(traprepress_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
+ traprepress_attr_mask);
+
+ if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
+ traprepress_attr_mask)
+ return 1;
+
+ return 0;
+}
+
+int mlx4_config_mad_demux(struct mlx4_dev *dev)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int secure_host_active;
+ int err;
+
+ /* Check if mad_demux is supported */
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
+ return -ENOMEM;
+ }
+
+ /* Query mad_demux to find out which MADs are handled by internal sma */
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
+ MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
+ err);
+ goto out;
+ }
+
+ secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
+
+ /* Config mad_demux to handle all MADs returned by the query above */
+ err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
+ MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
+ goto out;
+ }
+
+ if (secure_host_active)
+ mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 80b8c5f30e4..0158689906f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1853,6 +1853,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
goto err_mr_table_free;
}
+ err = mlx4_config_mad_demux(dev);
+ if (err) {
+ mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
+ goto err_mcg_table_free;
+ }
}
err = mlx4_init_eq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 13fbcd03c3e..b508c7887ef 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -274,6 +274,8 @@ struct mlx4_icm_table {
#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
#define MLX4_MPT_FLAG_REGION (1 << 8)
+#define MLX4_MPT_PD_MASK (0x1FFFFUL)
+#define MLX4_MPT_PD_VF_MASK (0xFE0000UL)
#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
@@ -1306,5 +1308,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev);
int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
/* Returns the VF index of slave */
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
+int mlx4_config_mad_demux(struct mlx4_dev *dev);
#endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2839abb878a..7d717eccb7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry ***mpt_entry)
+{
+ int err;
+ int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+
+ /* Make sure that at this point we have single-threaded access only */
+
+ if (mmr->enabled != MLX4_MPT_EN_HW)
+ return -EINVAL;
+
+ err = mlx4_HW2SW_MPT(dev, NULL, key);
+
+ if (err) {
+ mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+ mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+ return err;
+ }
+
+ mmr->enabled = MLX4_MPT_EN_SW;
+
+ if (!mlx4_is_mfunc(dev)) {
+ **mpt_entry = mlx4_table_find(
+ &mlx4_priv(dev)->mr_table.dmpt_table,
+ key, NULL);
+ } else {
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR_OR_NULL(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
+ 0, MLX4_CMD_QUERY_MPT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ if (err)
+ goto free_mailbox;
+
+ *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
+ }
+
+ if (!(*mpt_entry) || !(**mpt_entry)) {
+ err = -ENOMEM;
+ goto free_mailbox;
+ }
+
+ return 0;
+
+free_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
+
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry **mpt_entry)
+{
+ int err;
+
+ if (!mlx4_is_mfunc(dev)) {
+ /* Make sure any changes to this entry are flushed */
+ wmb();
+
+ *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
+
+ /* Make sure the new status is written */
+ wmb();
+
+ err = mlx4_SYNC_TPT(dev);
+ } else {
+ int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+
+ struct mlx4_cmd_mailbox *mailbox =
+ container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+ buf);
+
+ err = mlx4_SW2HW_MPT(dev, mailbox, key);
+ }
+
+ mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
+ if (!err)
+ mmr->enabled = MLX4_MPT_EN_HW;
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
+
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry **mpt_entry)
+{
+ if (mlx4_is_mfunc(dev)) {
+ struct mlx4_cmd_mailbox *mailbox =
+ container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+ buf);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
+
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+ u32 pdn)
+{
+ u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
+ /* The wrapper function will put the slave's id here */
+ if (mlx4_is_mfunc(dev))
+ pd_flags &= ~MLX4_MPT_PD_VF_MASK;
+ mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) |
+ (pdn & MLX4_MPT_PD_MASK)
+ | MLX4_MPT_PD_FLAG_EN_INV);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
+
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry *mpt_entry,
+ u32 access)
+{
+ u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
+ (access & MLX4_PERM_MASK);
+
+ mpt_entry->flags = cpu_to_be32(flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
+
static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
u64 iova, u64 size, u32 access, int npages,
int page_shift, struct mlx4_mr *mr)
@@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
}
EXPORT_SYMBOL_GPL(mlx4_mr_free);
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+ mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+ u64 iova, u64 size, int npages,
+ int page_shift, struct mlx4_mpt_entry *mpt_entry)
+{
+ int err;
+
+ mpt_entry->start = cpu_to_be64(mr->iova);
+ mpt_entry->length = cpu_to_be64(mr->size);
+ mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
+ err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+ if (err)
+ return err;
+
+ if (mr->mtt.order < 0) {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+ mpt_entry->mtt_addr = 0;
+ } else {
+ mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+ &mr->mtt));
+ if (mr->mtt.page_shift == 0)
+ mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
+ }
+ mr->enabled = MLX4_MPT_EN_SW;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
+
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0efc1368e5a..1089367fed2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
if (err)
return err;
- if (mpt->com.from_state != RES_MPT_HW) {
+ if (mpt->com.from_state == RES_MPT_MAPPED) {
+ /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
+ * that, the VF must read the MPT. But since the MPT entry memory is not
+ * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
+ * entry contents. To guarantee that the MPT cannot be changed, the driver
+ * must perform HW2SW_MPT before this query and return the MPT entry to HW
+ * ownership fofollowing the change. The change here allows the VF to
+ * perform QUERY_MPT also when the entry is in SW ownership.
+ */
+ struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
+ &mlx4_priv(dev)->mr_table.dmpt_table,
+ mpt->key, NULL);
+
+ if (NULL == mpt_entry || NULL == outbox->buf) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
+
+ err = 0;
+ } else if (mpt->com.from_state == RES_MPT_HW) {
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ } else {
err = -EBUSY;
goto out;
}
- err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
out:
put_res(dev, slave, id, RES_MPT);
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 69c26f04d8c..679db026f4b 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -873,6 +873,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
return -ENOMEM;
dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
DMA_BIDIRECTIONAL);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+ __free_page(dmatest_page);
+ return -ENOMEM;
+ }
/* Run a small DMA test.
* The magic multipliers to the length tell the firmware
@@ -1294,6 +1298,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
int bytes, int watchdog)
{
struct page *page;
+ dma_addr_t bus;
int idx;
#if MYRI10GE_ALLOC_SIZE > 4096
int end_offset;
@@ -1318,11 +1323,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
rx->watchdog_needed = 1;
return;
}
+
+ bus = pci_map_page(mgp->pdev, page, 0,
+ MYRI10GE_ALLOC_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ __free_pages(page, MYRI10GE_ALLOC_ORDER);
+ if (rx->fill_cnt - rx->cnt < 16)
+ rx->watchdog_needed = 1;
+ return;
+ }
+
rx->page = page;
rx->page_offset = 0;
- rx->bus = pci_map_page(mgp->pdev, page, 0,
- MYRI10GE_ALLOC_SIZE,
- PCI_DMA_FROMDEVICE);
+ rx->bus = bus;
+
}
rx->info[idx].page = rx->page;
rx->info[idx].page_offset = rx->page_offset;
@@ -2764,6 +2779,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
mb();
}
+static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
+ struct myri10ge_tx_buf *tx, int idx)
+{
+ unsigned int len;
+ int last_idx;
+
+ /* Free any DMA resources we've alloced and clear out the skb slot */
+ last_idx = (idx + 1) & tx->mask;
+ idx = tx->req & tx->mask;
+ do {
+ len = dma_unmap_len(&tx->info[idx], len);
+ if (len) {
+ if (tx->info[idx].skb != NULL)
+ pci_unmap_single(mgp->pdev,
+ dma_unmap_addr(&tx->info[idx],
+ bus), len,
+ PCI_DMA_TODEVICE);
+ else
+ pci_unmap_page(mgp->pdev,
+ dma_unmap_addr(&tx->info[idx],
+ bus), len,
+ PCI_DMA_TODEVICE);
+ dma_unmap_len_set(&tx->info[idx], len, 0);
+ tx->info[idx].skb = NULL;
+ }
+ idx = (idx + 1) & tx->mask;
+ } while (idx != last_idx);
+}
+
/*
* Transmit a packet. We need to split the packet so that a single
* segment does not cross myri10ge->tx_boundary, so this makes segment
@@ -2787,7 +2831,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
u32 low;
__be32 high_swapped;
unsigned int len;
- int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
+ int idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag;
@@ -2884,9 +2928,12 @@ again:
/* map the skb for DMA */
len = skb_headlen(skb);
+ bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+ goto drop;
+
idx = tx->req & tx->mask;
tx->info[idx].skb = skb;
- bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
dma_unmap_addr_set(&tx->info[idx], bus, bus);
dma_unmap_len_set(&tx->info[idx], len, len);
@@ -2985,12 +3032,16 @@ again:
break;
/* map next fragment for DMA */
- idx = (count + tx->req) & tx->mask;
frag = &skb_shinfo(skb)->frags[frag_idx];
frag_idx++;
len = skb_frag_size(frag);
bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
DMA_TO_DEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ myri10ge_unmap_tx_dma(mgp, tx, idx);
+ goto drop;
+ }
+ idx = (count + tx->req) & tx->mask;
dma_unmap_addr_set(&tx->info[idx], bus, bus);
dma_unmap_len_set(&tx->info[idx], len, len);
}
@@ -3021,31 +3072,8 @@ again:
return NETDEV_TX_OK;
abort_linearize:
- /* Free any DMA resources we've alloced and clear out the skb
- * slot so as to not trip up assertions, and to avoid a
- * double-free if linearizing fails */
+ myri10ge_unmap_tx_dma(mgp, tx, idx);
- last_idx = (idx + 1) & tx->mask;
- idx = tx->req & tx->mask;
- tx->info[idx].skb = NULL;
- do {
- len = dma_unmap_len(&tx->info[idx], len);
- if (len) {
- if (tx->info[idx].skb != NULL)
- pci_unmap_single(mgp->pdev,
- dma_unmap_addr(&tx->info[idx],
- bus), len,
- PCI_DMA_TODEVICE);
- else
- pci_unmap_page(mgp->pdev,
- dma_unmap_addr(&tx->info[idx],
- bus), len,
- PCI_DMA_TODEVICE);
- dma_unmap_len_set(&tx->info[idx], len, 0);
- tx->info[idx].skb = NULL;
- }
- idx = (idx + 1) & tx->mask;
- } while (idx != last_idx);
if (skb_is_gso(skb)) {
netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
goto drop;
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index d813bfb1a84..23c89ab5a6a 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -32,6 +32,11 @@ MODULE_DESCRIPTION("Sun LDOM virtual network driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_MODULE_VERSION);
+/* Heuristic for the number of times to exponentially backoff and
+ * retry sending an LDC trigger when EAGAIN is encountered
+ */
+#define VNET_MAX_RETRIES 10
+
/* Ordered from largest major to lowest */
static struct vio_version vnet_versions[] = {
{ .major = 1, .minor = 0 },
@@ -260,6 +265,7 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
.state = vio_dring_state,
};
int err, delay;
+ int retries = 0;
hdr.seq = dr->snd_nxt;
delay = 1;
@@ -272,6 +278,13 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
udelay(delay);
if ((delay <<= 1) > 128)
delay = 128;
+ if (retries++ > VNET_MAX_RETRIES) {
+ pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
+ port->raddr[0], port->raddr[1],
+ port->raddr[2], port->raddr[3],
+ port->raddr[4], port->raddr[5]);
+ err = -ECONNRESET;
+ }
} while (err == -EAGAIN);
return err;
@@ -475,8 +488,9 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
return 0;
}
-static void maybe_tx_wakeup(struct vnet *vp)
+static void maybe_tx_wakeup(unsigned long param)
{
+ struct vnet *vp = (struct vnet *)param;
struct net_device *dev = vp->dev;
netif_tx_lock(dev);
@@ -573,8 +587,13 @@ static void vnet_event(void *arg, int event)
break;
}
spin_unlock(&vio->lock);
+ /* Kick off a tasklet to wake the queue. We cannot call
+ * maybe_tx_wakeup directly here because we could deadlock on
+ * netif_tx_lock() with dev_watchdog()
+ */
if (unlikely(tx_wakeup && err != -ECONNRESET))
- maybe_tx_wakeup(port->vp);
+ tasklet_schedule(&port->vp->vnet_tx_wakeup);
+
local_irq_restore(flags);
}
@@ -593,6 +612,7 @@ static int __vnet_tx_trigger(struct vnet_port *port)
.end_idx = (u32) -1,
};
int err, delay;
+ int retries = 0;
hdr.seq = dr->snd_nxt;
delay = 1;
@@ -605,6 +625,8 @@ static int __vnet_tx_trigger(struct vnet_port *port)
udelay(delay);
if ((delay <<= 1) > 128)
delay = 128;
+ if (retries++ > VNET_MAX_RETRIES)
+ break;
} while (err == -EAGAIN);
return err;
@@ -691,7 +713,15 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
}
- d->hdr.ack = VIO_ACK_ENABLE;
+ /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
+ * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
+ * the protocol itself does not require it as long as the peer
+ * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
+ *
+ * An ACK for every packet in the ring is expensive as the
+ * sending of LDC messages is slow and affects performance.
+ */
+ d->hdr.ack = VIO_ACK_DISABLE;
d->size = len;
d->ncookies = port->tx_bufs[dr->prod].ncookies;
for (i = 0; i < d->ncookies; i++)
@@ -1046,6 +1076,7 @@ static struct vnet *vnet_new(const u64 *local_mac)
vp = netdev_priv(dev);
spin_lock_init(&vp->lock);
+ tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
vp->dev = dev;
INIT_LIST_HEAD(&vp->port_list);
@@ -1105,6 +1136,7 @@ static void vnet_cleanup(void)
vp = list_first_entry(&vnet_list, struct vnet, list);
list_del(&vp->list);
dev = vp->dev;
+ tasklet_kill(&vp->vnet_tx_wakeup);
/* vio_unregister_driver() should have cleaned up port_list */
BUG_ON(!list_empty(&vp->port_list));
unregister_netdev(dev);
diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
index d347a5bf24b..de5c2c64996 100644
--- a/drivers/net/ethernet/sun/sunvnet.h
+++ b/drivers/net/ethernet/sun/sunvnet.h
@@ -1,6 +1,8 @@
#ifndef _SUNVNET_H
#define _SUNVNET_H
+#include <linux/interrupt.h>
+
#define DESC_NCOOKIES(entry_size) \
((entry_size) - sizeof(struct vio_net_desc))
@@ -78,6 +80,8 @@ struct vnet {
struct list_head list;
u64 local_mac;
+
+ struct tasklet_struct vnet_tx_wakeup;
};
#endif /* _SUNVNET_H */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 36f4459520c..fda5891835d 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1170,7 +1170,6 @@ static struct platform_driver temac_of_driver = {
.probe = temac_of_probe,
.remove = temac_of_remove,
.driver = {
- .owner = THIS_MODULE,
.name = "xilinx_temac",
.of_match_table = temac_of_match,
},
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 30e8608ff05..c8fd94133ec 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1645,7 +1645,6 @@ static struct platform_driver axienet_of_driver = {
.probe = axienet_of_probe,
.remove = axienet_of_remove,
.driver = {
- .owner = THIS_MODULE,
.name = "xilinx_axienet",
.of_match_table = axienet_of_match,
},
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 782bb9373cd..28dbbdc393e 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1245,7 +1245,6 @@ MODULE_DEVICE_TABLE(of, xemaclite_of_match);
static struct platform_driver xemaclite_of_driver = {
.driver = {
.name = DRIVER_NAME,
- .owner = THIS_MODULE,
.of_match_table = xemaclite_of_match,
},
.probe = xemaclite_of_probe,
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a931..6d3e2093bf7 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
.resume = toshoboe_wakeup
};
-static int __init
-donauboe_init (void)
-{
- return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
- pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index ef8a5c20236..60e4ca01ccb 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -45,10 +45,9 @@ struct macvlan_port {
struct sk_buff_head bc_queue;
struct work_struct bc_work;
bool passthru;
+ int count;
};
-#define MACVLAN_PORT_IS_EMPTY(port) list_empty(&port->vlans)
-
struct macvlan_skb_cb {
const struct macvlan_dev *src;
};
@@ -667,7 +666,8 @@ static void macvlan_uninit(struct net_device *dev)
free_percpu(vlan->pcpu_stats);
- if (MACVLAN_PORT_IS_EMPTY(port))
+ port->count -= 1;
+ if (!port->count)
macvlan_port_destroy(port->dev);
}
@@ -1020,12 +1020,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
- if (!MACVLAN_PORT_IS_EMPTY(port))
+ if (port->count)
return -EINVAL;
port->passthru = true;
eth_hw_addr_inherit(dev, lowerdev);
}
+ port->count += 1;
err = register_netdevice(dev);
if (err < 0)
goto destroy_port;
@@ -1043,7 +1044,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
unregister_netdev:
unregister_netdevice(dev);
destroy_port:
- if (MACVLAN_PORT_IS_EMPTY(port))
+ port->count -= 1;
+ if (!port->count)
macvlan_port_destroy(lowerdev);
return err;
diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 8596aba34f9..237d0cda1bc 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -256,6 +256,7 @@ struct ar9170 {
atomic_t rx_work_urbs;
atomic_t rx_pool_urbs;
kernel_ulong_t features;
+ bool usb_ep_cmd_is_bulk;
/* firmware settings */
struct completion fw_load_wait;
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index f35c7f30f9a..c9f93310c0d 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd,
goto err_free;
}
- usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev,
- AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4,
- carl9170_usb_cmd_complete, ar, 1);
+ if (ar->usb_ep_cmd_is_bulk)
+ usb_fill_bulk_urb(urb, ar->udev,
+ usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD),
+ cmd, cmd->hdr.len + 4,
+ carl9170_usb_cmd_complete, ar);
+ else
+ usb_fill_int_urb(urb, ar->udev,
+ usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD),
+ cmd, cmd->hdr.len + 4,
+ carl9170_usb_cmd_complete, ar, 1);
if (free_buf)
urb->transfer_flags |= URB_FREE_BUFFER;
@@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw,
static int carl9170_usb_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
+ struct usb_endpoint_descriptor *ep;
struct ar9170 *ar;
struct usb_device *udev;
- int err;
+ int i, err;
err = usb_reset_device(interface_to_usbdev(intf));
if (err)
@@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf,
ar->intf = intf;
ar->features = id->driver_info;
+ /* We need to remember the type of endpoint 4 because it differs
+ * between high- and full-speed configuration. The high-speed
+ * configuration specifies it as interrupt and the full-speed
+ * configuration as bulk endpoint. This information is required
+ * later when sending urbs to that endpoint.
+ */
+ for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) {
+ ep = &intf->cur_altsetting->endpoint[i].desc;
+
+ if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD &&
+ usb_endpoint_dir_out(ep) &&
+ usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK)
+ ar->usb_ep_cmd_is_bulk = true;
+ }
+
usb_set_intfdata(intf, ar);
SET_IEEE80211_DEV(ar->hw, &intf->dev);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
index 535c7eb01b3..8f8b9373de9 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
@@ -1318,6 +1318,8 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
msgbuf->nrof_flowrings = if_msgbuf->nrof_flowrings;
msgbuf->flowring_dma_handle = kzalloc(msgbuf->nrof_flowrings *
sizeof(*msgbuf->flowring_dma_handle), GFP_ATOMIC);
+ if (!msgbuf->flowring_dma_handle)
+ goto fail;
msgbuf->rx_dataoffset = if_msgbuf->rx_dataoffset;
msgbuf->max_rxbufpost = if_msgbuf->max_rxbufpost;
@@ -1362,6 +1364,7 @@ fail:
kfree(msgbuf->flow_map);
kfree(msgbuf->txstatus_done_map);
brcmf_msgbuf_release_pktids(msgbuf);
+ kfree(msgbuf->flowring_dma_handle);
if (msgbuf->ioctbuf)
dma_free_coherent(drvr->bus_if->dev,
BRCMF_TX_IOCTL_MAX_MSG_SIZE,
@@ -1391,6 +1394,7 @@ void brcmf_proto_msgbuf_detach(struct brcmf_pub *drvr)
BRCMF_TX_IOCTL_MAX_MSG_SIZE,
msgbuf->ioctbuf, msgbuf->ioctbuf_handle);
brcmf_msgbuf_release_pktids(msgbuf);
+ kfree(msgbuf->flowring_dma_handle);
kfree(msgbuf);
drvr->proto->pd = NULL;
}
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
index bc972c0ba5f..e5101b287e4 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
@@ -591,12 +591,13 @@ static void brcmf_pcie_handle_mb_data(struct brcmf_pciedev_info *devinfo)
}
if (dtoh_mb_data & BRCMF_D2H_DEV_DS_EXIT_NOTE)
brcmf_dbg(PCIE, "D2H_MB_DATA: DEEP SLEEP EXIT\n");
- if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK)
+ if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) {
brcmf_dbg(PCIE, "D2H_MB_DATA: D3 ACK\n");
if (waitqueue_active(&devinfo->mbdata_resp_wait)) {
devinfo->mbdata_completed = true;
wake_up(&devinfo->mbdata_resp_wait);
}
+ }
}
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index c5aa404069f..389656bd1a7 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -9853,6 +9853,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev,
strncpy(extra, "unknown", MAX_WX_STRING);
break;
}
+ extra[MAX_WX_STRING - 1] = '\0';
IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra);
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 0d6a8b768a6..7c8796584c2 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -396,7 +396,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
else
hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
- hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
+ /* TODO: enable that only for firmwares that don't crash */
+ /* hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; */
hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX;
hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES;
/* we create the 802.11 header and zero length SSID IE. */
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f46a3..d4eb8d2e9cb 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
u16 dealloc_ring[MAX_PENDING_REQS];
struct task_struct *dealloc_task;
wait_queue_head_t dealloc_wq;
+ atomic_t inflight_packets;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues;
extern struct dentry *xen_netback_dbg_root;
#endif
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index bfd10cb9c8d..e29e15dca86 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,23 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb)
+{
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+ atomic_dec(&queue->inflight_packets);
+}
+
static inline void xenvif_stop_queue(struct xenvif_queue *queue)
{
struct net_device *dev = queue->vif->dev;
@@ -524,9 +541,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
init_timer(&queue->rx_stalled);
- netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
- XENVIF_NAPI_WEIGHT);
-
return 0;
}
@@ -560,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
init_waitqueue_head(&queue->wq);
init_waitqueue_head(&queue->dealloc_wq);
+ atomic_set(&queue->inflight_packets, 0);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
@@ -614,6 +629,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
wake_up_process(queue->task);
wake_up_process(queue->dealloc_task);
+ netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+ XENVIF_NAPI_WEIGHT);
+
return 0;
err_rx_unbind:
@@ -642,25 +660,6 @@ void xenvif_carrier_off(struct xenvif *vif)
rtnl_unlock();
}
-static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
- unsigned int worst_case_skb_lifetime)
-{
- int i, unmap_timeout = 0;
-
- for (i = 0; i < MAX_PENDING_REQS; ++i) {
- if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
- unmap_timeout++;
- schedule_timeout(msecs_to_jiffies(1000));
- if (unmap_timeout > worst_case_skb_lifetime &&
- net_ratelimit())
- netdev_err(queue->vif->dev,
- "Page still granted! Index: %x\n",
- i);
- i = -1;
- }
- }
-}
-
void xenvif_disconnect(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
@@ -672,6 +671,8 @@ void xenvif_disconnect(struct xenvif *vif)
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
+ netif_napi_del(&queue->napi);
+
if (queue->task) {
del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
@@ -704,7 +705,6 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_deinit_queue(struct xenvif_queue *queue)
{
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
- netif_napi_del(&queue->napi);
}
void xenvif_free(struct xenvif *vif)
@@ -712,21 +712,11 @@ void xenvif_free(struct xenvif *vif)
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- /* Here we want to avoid timeout messages if an skb can be legitimately
- * stuck somewhere else. Realistically this could be an another vif's
- * internal or QDisc queue. That another vif also has this
- * rx_drain_timeout_msecs timeout, so give it time to drain out.
- * Although if that other guest wakes up just before its timeout happens
- * and takes only one skb from QDisc, it can hold onto other skbs for a
- * longer period.
- */
- unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
unregister_netdev(vif->dev);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
- xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
xenvif_deinit_queue(queue);
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4734472aa62..08f65996534 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
/* remove traces of mapped pages and frag_list */
skb_frag_list_init(skb);
uarg = skb_shinfo(skb)->destructor_arg;
+ /* increase inflight counter to offset decrement in callback */
+ atomic_inc(&queue->inflight_packets);
uarg->callback(uarg, true);
skb_shinfo(skb)->destructor_arg = NULL;
- skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, nskb);
kfree_skb(nskb);
return 0;
@@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
if (net_ratelimit())
netdev_err(queue->vif->dev,
"Not enough memory to consolidate frag_list!\n");
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
"Can't setup checksum in net_tx_action\n");
/* We have to set this flag to trigger the callback */
if (skb_shinfo(skb)->destructor_arg)
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
* skb. E.g. the __pskb_pull_tail earlier can do such thing.
*/
if (skb_shinfo(skb)->destructor_arg) {
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
queue->stats.tx_zerocopy_sent++;
}
@@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
queue->stats.tx_zerocopy_success++;
else
queue->stats.tx_zerocopy_fail++;
+ xenvif_skb_zerocopy_complete(queue);
}
static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -2058,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data)
return 0;
}
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+ /* Dealloc thread must remain running until all inflight
+ * packets complete.
+ */
+ return kthread_should_stop() &&
+ !atomic_read(&queue->inflight_packets);
+}
+
int xenvif_dealloc_kthread(void *data)
{
struct xenvif_queue *queue = data;
- while (!kthread_should_stop()) {
+ for (;;) {
wait_event_interruptible(queue->dealloc_wq,
tx_dealloc_work_todo(queue) ||
- kthread_should_stop());
- if (kthread_should_stop())
+ xenvif_dealloc_kthread_should_stop(queue));
+ if (xenvif_dealloc_kthread_should_stop(queue))
break;
xenvif_tx_dealloc_action(queue);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 580517d857b..9c47b897b6d 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -116,6 +116,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
}
#define XENVIF_KICK_STR "kick"
+#define BUFFER_SIZE 32
static ssize_t
xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
@@ -124,22 +125,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
struct xenvif_queue *queue =
((struct seq_file *)filp->private_data)->private;
int len;
- char write[sizeof(XENVIF_KICK_STR)];
+ char write[BUFFER_SIZE];
/* don't allow partial writes and check the length */
if (*ppos != 0)
return 0;
- if (count < sizeof(XENVIF_KICK_STR) - 1)
+ if (count >= sizeof(write))
return -ENOSPC;
len = simple_write_to_buffer(write,
- sizeof(write),
+ sizeof(write) - 1,
ppos,
buf,
count);
if (len < 0)
return len;
+ write[len] = '\0';
+
if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
xenvif_interrupt(0, (void *)queue);
else {
@@ -171,10 +174,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
.write = xenvif_write_io_ring,
};
-static void xenvif_debugfs_addif(struct xenvif_queue *queue)
+static void xenvif_debugfs_addif(struct xenvif *vif)
{
struct dentry *pfile;
- struct xenvif *vif = queue->vif;
int i;
if (IS_ERR_OR_NULL(xen_netback_dbg_root))
@@ -733,10 +735,11 @@ static void connect(struct backend_info *be)
be->vif->num_queues = queue_index;
goto err;
}
+ }
+
#ifdef CONFIG_DEBUG_FS
- xenvif_debugfs_addif(queue);
+ xenvif_debugfs_addif(be->vif);
#endif /* CONFIG_DEBUG_FS */
- }
/* Initialisation completed, tell core driver the number of
* active queues.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 2dcb0541012..5160c4eb73c 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -9,7 +9,8 @@ menu "Device Tree and Open Firmware support"
config OF_SELFTEST
bool "Device Tree Runtime self tests"
- depends on OF_IRQ
+ depends on OF_IRQ && OF_EARLY_FLATTREE
+ select OF_DYNAMIC
help
This option builds in test cases for the device tree infrastructure
that are executed once at boot time, and the results dumped to the
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 099b1fb00af..2b6a7b129d1 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,11 +1,13 @@
obj-y = base.o device.o platform.o
+obj-$(CONFIG_OF_DYNAMIC) += dynamic.o
obj-$(CONFIG_OF_FLATTREE) += fdt.o
obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
obj-$(CONFIG_OF_PROMTREE) += pdt.o
obj-$(CONFIG_OF_ADDRESS) += address.o
obj-$(CONFIG_OF_IRQ) += irq.o
obj-$(CONFIG_OF_NET) += of_net.o
-obj-$(CONFIG_OF_SELFTEST) += selftest.o
+obj-$(CONFIG_OF_SELFTEST) += of_selftest.o
+of_selftest-objs := selftest.o testcase-data/testcases.dtb.o
obj-$(CONFIG_OF_MDIO) += of_mdio.o
obj-$(CONFIG_OF_PCI) += of_pci.o
obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o
diff --git a/drivers/of/base.c b/drivers/of/base.c
index b9864806e9b..d8574adf0d6 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,6 +17,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/console.h>
#include <linux/ctype.h>
#include <linux/cpu.h>
#include <linux/module.h>
@@ -35,15 +36,17 @@ struct device_node *of_allnodes;
EXPORT_SYMBOL(of_allnodes);
struct device_node *of_chosen;
struct device_node *of_aliases;
-static struct device_node *of_stdout;
+struct device_node *of_stdout;
-static struct kset *of_kset;
+struct kset *of_kset;
/*
- * Used to protect the of_aliases; but also overloaded to hold off addition of
- * nodes to sysfs
+ * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
+ * This mutex must be held whenever modifications are being made to the
+ * device tree. The of_{attach,detach}_node() and
+ * of_{add,remove,update}_property() helpers make sure this happens.
*/
-DEFINE_MUTEX(of_aliases_mutex);
+DEFINE_MUTEX(of_mutex);
/* use when traversing tree through the allnext, child, sibling,
* or parent members of struct device_node.
@@ -89,79 +92,7 @@ int __weak of_node_to_nid(struct device_node *np)
}
#endif
-#if defined(CONFIG_OF_DYNAMIC)
-/**
- * of_node_get - Increment refcount of a node
- * @node: Node to inc refcount, NULL is supported to
- * simplify writing of callers
- *
- * Returns node.
- */
-struct device_node *of_node_get(struct device_node *node)
-{
- if (node)
- kobject_get(&node->kobj);
- return node;
-}
-EXPORT_SYMBOL(of_node_get);
-
-static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
-{
- return container_of(kobj, struct device_node, kobj);
-}
-
-/**
- * of_node_release - release a dynamically allocated node
- * @kref: kref element of the node to be released
- *
- * In of_node_put() this function is passed to kref_put()
- * as the destructor.
- */
-static void of_node_release(struct kobject *kobj)
-{
- struct device_node *node = kobj_to_device_node(kobj);
- struct property *prop = node->properties;
-
- /* We should never be releasing nodes that haven't been detached. */
- if (!of_node_check_flag(node, OF_DETACHED)) {
- pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
- dump_stack();
- return;
- }
-
- if (!of_node_check_flag(node, OF_DYNAMIC))
- return;
-
- while (prop) {
- struct property *next = prop->next;
- kfree(prop->name);
- kfree(prop->value);
- kfree(prop);
- prop = next;
-
- if (!prop) {
- prop = node->deadprops;
- node->deadprops = NULL;
- }
- }
- kfree(node->full_name);
- kfree(node->data);
- kfree(node);
-}
-
-/**
- * of_node_put - Decrement refcount of a node
- * @node: Node to dec refcount, NULL is supported to
- * simplify writing of callers
- *
- */
-void of_node_put(struct device_node *node)
-{
- if (node)
- kobject_put(&node->kobj);
-}
-EXPORT_SYMBOL(of_node_put);
-#else
+#ifndef CONFIG_OF_DYNAMIC
static void of_node_release(struct kobject *kobj)
{
/* Without CONFIG_OF_DYNAMIC, no nodes gets freed */
@@ -200,13 +131,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
return name;
}
-static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
+int __of_add_property_sysfs(struct device_node *np, struct property *pp)
{
int rc;
/* Important: Don't leak passwords */
bool secure = strncmp(pp->name, "security-", 9) == 0;
+ if (!of_kset || !of_node_is_attached(np))
+ return 0;
+
sysfs_bin_attr_init(&pp->attr);
pp->attr.attr.name = safe_name(&np->kobj, pp->name);
pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO;
@@ -218,12 +152,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
return rc;
}
-static int __of_node_add(struct device_node *np)
+int __of_attach_node_sysfs(struct device_node *np)
{
const char *name;
struct property *pp;
int rc;
+ if (!of_kset)
+ return 0;
+
np->kobj.kset = of_kset;
if (!np->parent) {
/* Nodes without parents are new top level trees */
@@ -245,59 +182,20 @@ static int __of_node_add(struct device_node *np)
return 0;
}
-int of_node_add(struct device_node *np)
-{
- int rc = 0;
-
- BUG_ON(!of_node_is_initialized(np));
-
- /*
- * Grab the mutex here so that in a race condition between of_init() and
- * of_node_add(), node addition will still be consistent.
- */
- mutex_lock(&of_aliases_mutex);
- if (of_kset)
- rc = __of_node_add(np);
- else
- /* This scenario may be perfectly valid, but report it anyway */
- pr_info("of_node_add(%s) before of_init()\n", np->full_name);
- mutex_unlock(&of_aliases_mutex);
- return rc;
-}
-
-#if defined(CONFIG_OF_DYNAMIC)
-static void of_node_remove(struct device_node *np)
-{
- struct property *pp;
-
- BUG_ON(!of_node_is_initialized(np));
-
- /* only remove properties if on sysfs */
- if (of_node_is_attached(np)) {
- for_each_property_of_node(np, pp)
- sysfs_remove_bin_file(&np->kobj, &pp->attr);
- kobject_del(&np->kobj);
- }
-
- /* finally remove the kobj_init ref */
- of_node_put(np);
-}
-#endif
-
static int __init of_init(void)
{
struct device_node *np;
/* Create the kset, and register existing nodes */
- mutex_lock(&of_aliases_mutex);
+ mutex_lock(&of_mutex);
of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
if (!of_kset) {
- mutex_unlock(&of_aliases_mutex);
+ mutex_unlock(&of_mutex);
return -ENOMEM;
}
for_each_of_allnodes(np)
- __of_node_add(np);
- mutex_unlock(&of_aliases_mutex);
+ __of_attach_node_sysfs(np);
+ mutex_unlock(&of_mutex);
/* Symlink in /proc as required by userspace ABI */
if (of_allnodes)
@@ -369,8 +267,8 @@ EXPORT_SYMBOL(of_find_all_nodes);
* Find a property with a given name for a given node
* and return the value.
*/
-static const void *__of_get_property(const struct device_node *np,
- const char *name, int *lenp)
+const void *__of_get_property(const struct device_node *np,
+ const char *name, int *lenp)
{
struct property *pp = __of_find_property(np, name, lenp);
@@ -1748,32 +1646,10 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na
}
EXPORT_SYMBOL(of_count_phandle_with_args);
-#if defined(CONFIG_OF_DYNAMIC)
-static int of_property_notify(int action, struct device_node *np,
- struct property *prop)
-{
- struct of_prop_reconfig pr;
-
- /* only call notifiers if the node is attached */
- if (!of_node_is_attached(np))
- return 0;
-
- pr.dn = np;
- pr.prop = prop;
- return of_reconfig_notify(action, &pr);
-}
-#else
-static int of_property_notify(int action, struct device_node *np,
- struct property *prop)
-{
- return 0;
-}
-#endif
-
/**
* __of_add_property - Add a property to a node without lock operations
*/
-static int __of_add_property(struct device_node *np, struct property *prop)
+int __of_add_property(struct device_node *np, struct property *prop)
{
struct property **next;
@@ -1799,22 +1675,49 @@ int of_add_property(struct device_node *np, struct property *prop)
unsigned long flags;
int rc;
- rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
- if (rc)
- return rc;
+ mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
rc = __of_add_property(np, prop);
raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (rc)
- return rc;
- if (of_node_is_attached(np))
+ if (!rc)
__of_add_property_sysfs(np, prop);
+ mutex_unlock(&of_mutex);
+
+ if (!rc)
+ of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);
+
return rc;
}
+int __of_remove_property(struct device_node *np, struct property *prop)
+{
+ struct property **next;
+
+ for (next = &np->properties; *next; next = &(*next)->next) {
+ if (*next == prop)
+ break;
+ }
+ if (*next == NULL)
+ return -ENODEV;
+
+ /* found the node */
+ *next = prop->next;
+ prop->next = np->deadprops;
+ np->deadprops = prop;
+
+ return 0;
+}
+
+void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+{
+ /* at early boot, bail here and defer setup to of_init() */
+ if (of_kset && of_node_is_attached(np))
+ sysfs_remove_bin_file(&np->kobj, &prop->attr);
+}
+
/**
* of_remove_property - Remove a property from a node.
*
@@ -1825,211 +1728,98 @@ int of_add_property(struct device_node *np, struct property *prop)
*/
int of_remove_property(struct device_node *np, struct property *prop)
{
- struct property **next;
unsigned long flags;
- int found = 0;
int rc;
- rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
- if (rc)
- return rc;
+ mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
- next = &np->properties;
- while (*next) {
- if (*next == prop) {
- /* found the node */
- *next = prop->next;
- prop->next = np->deadprops;
- np->deadprops = prop;
- found = 1;
- break;
- }
- next = &(*next)->next;
- }
+ rc = __of_remove_property(np, prop);
raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (!found)
- return -ENODEV;
+ if (!rc)
+ __of_remove_property_sysfs(np, prop);
- /* at early boot, bail hear and defer setup to of_init() */
- if (!of_kset)
- return 0;
+ mutex_unlock(&of_mutex);
- sysfs_remove_bin_file(&np->kobj, &prop->attr);
+ if (!rc)
+ of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);
- return 0;
+ return rc;
}
-/*
- * of_update_property - Update a property in a node, if the property does
- * not exist, add it.
- *
- * Note that we don't actually remove it, since we have given out
- * who-knows-how-many pointers to the data using get-property.
- * Instead we just move the property to the "dead properties" list,
- * and add the new property to the property list
- */
-int of_update_property(struct device_node *np, struct property *newprop)
+int __of_update_property(struct device_node *np, struct property *newprop,
+ struct property **oldpropp)
{
struct property **next, *oldprop;
- unsigned long flags;
- int rc;
-
- rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
- if (rc)
- return rc;
- if (!newprop->name)
- return -EINVAL;
+ for (next = &np->properties; *next; next = &(*next)->next) {
+ if (of_prop_cmp((*next)->name, newprop->name) == 0)
+ break;
+ }
+ *oldpropp = oldprop = *next;
- raw_spin_lock_irqsave(&devtree_lock, flags);
- next = &np->properties;
- oldprop = __of_find_property(np, newprop->name, NULL);
- if (!oldprop) {
- /* add the new node */
- rc = __of_add_property(np, newprop);
- } else while (*next) {
+ if (oldprop) {
/* replace the node */
- if (*next == oldprop) {
- newprop->next = oldprop->next;
- *next = newprop;
- oldprop->next = np->deadprops;
- np->deadprops = oldprop;
- break;
- }
- next = &(*next)->next;
+ newprop->next = oldprop->next;
+ *next = newprop;
+ oldprop->next = np->deadprops;
+ np->deadprops = oldprop;
+ } else {
+ /* new node */
+ newprop->next = NULL;
+ *next = newprop;
}
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (rc)
- return rc;
+ return 0;
+}
+
+void __of_update_property_sysfs(struct device_node *np, struct property *newprop,
+ struct property *oldprop)
+{
/* At early boot, bail out and defer setup to of_init() */
if (!of_kset)
- return 0;
+ return;
- /* Update the sysfs attribute */
if (oldprop)
sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
__of_add_property_sysfs(np, newprop);
-
- return 0;
}
-#if defined(CONFIG_OF_DYNAMIC)
/*
- * Support for dynamic device trees.
+ * of_update_property - Update a property in a node, if the property does
+ * not exist, add it.
*
- * On some platforms, the device tree can be manipulated at runtime.
- * The routines in this section support adding, removing and changing
- * device tree nodes.
- */
-
-static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
-
-int of_reconfig_notifier_register(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&of_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
-
-int of_reconfig_notifier_unregister(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
-
-int of_reconfig_notify(unsigned long action, void *p)
-{
- int rc;
-
- rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
- return notifier_to_errno(rc);
-}
-
-/**
- * of_attach_node - Plug a device node into the tree and global list.
+ * Note that we don't actually remove it, since we have given out
+ * who-knows-how-many pointers to the data using get-property.
+ * Instead we just move the property to the "dead properties" list,
+ * and add the new property to the property list
*/
-int of_attach_node(struct device_node *np)
+int of_update_property(struct device_node *np, struct property *newprop)
{
+ struct property *oldprop;
unsigned long flags;
int rc;
- rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
- if (rc)
- return rc;
-
- raw_spin_lock_irqsave(&devtree_lock, flags);
- np->sibling = np->parent->child;
- np->allnext = np->parent->allnext;
- np->parent->allnext = np;
- np->parent->child = np;
- of_node_clear_flag(np, OF_DETACHED);
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
-
- of_node_add(np);
- return 0;
-}
-
-/**
- * of_detach_node - "Unplug" a node from the device tree.
- *
- * The caller must hold a reference to the node. The memory associated with
- * the node is not freed until its refcount goes to zero.
- */
-int of_detach_node(struct device_node *np)
-{
- struct device_node *parent;
- unsigned long flags;
- int rc = 0;
+ if (!newprop->name)
+ return -EINVAL;
- rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
- if (rc)
- return rc;
+ mutex_lock(&of_mutex);
raw_spin_lock_irqsave(&devtree_lock, flags);
+ rc = __of_update_property(np, newprop, &oldprop);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (of_node_check_flag(np, OF_DETACHED)) {
- /* someone already detached it */
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
- return rc;
- }
-
- parent = np->parent;
- if (!parent) {
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
- return rc;
- }
+ if (!rc)
+ __of_update_property_sysfs(np, newprop, oldprop);
- if (of_allnodes == np)
- of_allnodes = np->allnext;
- else {
- struct device_node *prev;
- for (prev = of_allnodes;
- prev->allnext != np;
- prev = prev->allnext)
- ;
- prev->allnext = np->allnext;
- }
+ mutex_unlock(&of_mutex);
- if (parent->child == np)
- parent->child = np->sibling;
- else {
- struct device_node *prevsib;
- for (prevsib = np->parent->child;
- prevsib->sibling != np;
- prevsib = prevsib->sibling)
- ;
- prevsib->sibling = np->sibling;
- }
-
- of_node_set_flag(np, OF_DETACHED);
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ if (!rc)
+ of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);
- of_node_remove(np);
return rc;
}
-#endif /* defined(CONFIG_OF_DYNAMIC) */
static void of_alias_add(struct alias_prop *ap, struct device_node *np,
int id, const char *stem, int stem_len)
@@ -2062,9 +1852,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
of_chosen = of_find_node_by_path("/chosen@0");
if (of_chosen) {
+ /* linux,stdout-path and /aliases/stdout are for legacy compatibility */
const char *name = of_get_property(of_chosen, "stdout-path", NULL);
if (!name)
name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (IS_ENABLED(CONFIG_PPC) && !name)
+ name = of_get_property(of_aliases, "stdout", NULL);
if (name)
of_stdout = of_find_node_by_path(name);
}
@@ -2122,7 +1915,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
struct alias_prop *app;
int id = -ENODEV;
- mutex_lock(&of_aliases_mutex);
+ mutex_lock(&of_mutex);
list_for_each_entry(app, &aliases_lookup, link) {
if (strcmp(app->stem, stem) != 0)
continue;
@@ -2132,7 +1925,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
break;
}
}
- mutex_unlock(&of_aliases_mutex);
+ mutex_unlock(&of_mutex);
return id;
}
@@ -2180,20 +1973,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
EXPORT_SYMBOL_GPL(of_prop_next_string);
/**
- * of_device_is_stdout_path - check if a device node matches the
- * linux,stdout-path property
- *
- * Check if this device node matches the linux,stdout-path property
- * in the chosen node. return true if yes, false otherwise.
+ * of_console_check() - Test and setup console for DT setup
+ * @dn - Pointer to device node
+ * @name - Name to use for preferred console without index. ex. "ttyS"
+ * @index - Index to use for preferred console.
+ *
+ * Check if the given device node matches the stdout-path property in the
+ * /chosen node. If it does then register it as the preferred console and return
+ * TRUE. Otherwise return FALSE.
*/
-int of_device_is_stdout_path(struct device_node *dn)
+bool of_console_check(struct device_node *dn, char *name, int index)
{
- if (!of_stdout)
+ if (!dn || dn != of_stdout || console_set_on_cmdline)
return false;
-
- return of_stdout == dn;
+ return add_preferred_console(name, index, NULL);
}
-EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+EXPORT_SYMBOL_GPL(of_console_check);
/**
* of_find_next_cache_node - Find a node's subsidiary cache
diff --git a/drivers/of/device.c b/drivers/of/device.c
index dafb9736ab9..46d6c75c140 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -160,7 +160,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen);
seen = 0;
- mutex_lock(&of_aliases_mutex);
+ mutex_lock(&of_mutex);
list_for_each_entry(app, &aliases_lookup, link) {
if (dev->of_node == app->np) {
add_uevent_var(env, "OF_ALIAS_%d=%s", seen,
@@ -168,7 +168,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
seen++;
}
}
- mutex_unlock(&of_aliases_mutex);
+ mutex_unlock(&of_mutex);
}
int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
new file mode 100644
index 00000000000..54fecc49a1f
--- /dev/null
+++ b/drivers/of/dynamic.c
@@ -0,0 +1,660 @@
+/*
+ * Support for dynamic device trees.
+ *
+ * On some platforms, the device tree can be manipulated at runtime.
+ * The routines in this section support adding, removing and changing
+ * device tree nodes.
+ */
+
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+
+#include "of_private.h"
+
+/**
+ * of_node_get() - Increment refcount of a node
+ * @node: Node to inc refcount, NULL is supported to simplify writing of
+ * callers
+ *
+ * Returns node.
+ */
+struct device_node *of_node_get(struct device_node *node)
+{
+ if (node)
+ kobject_get(&node->kobj);
+ return node;
+}
+EXPORT_SYMBOL(of_node_get);
+
+/**
+ * of_node_put() - Decrement refcount of a node
+ * @node: Node to dec refcount, NULL is supported to simplify writing of
+ * callers
+ */
+void of_node_put(struct device_node *node)
+{
+ if (node)
+ kobject_put(&node->kobj);
+}
+EXPORT_SYMBOL(of_node_put);
+
+void __of_detach_node_sysfs(struct device_node *np)
+{
+ struct property *pp;
+
+ BUG_ON(!of_node_is_initialized(np));
+ if (!of_kset)
+ return;
+
+ /* only remove properties if on sysfs */
+ if (of_node_is_attached(np)) {
+ for_each_property_of_node(np, pp)
+ sysfs_remove_bin_file(&np->kobj, &pp->attr);
+ kobject_del(&np->kobj);
+ }
+
+ /* finally remove the kobj_init ref */
+ of_node_put(np);
+}
+
+static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
+
+int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
+
+int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
+
+int of_reconfig_notify(unsigned long action, void *p)
+{
+ int rc;
+
+ rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
+ return notifier_to_errno(rc);
+}
+
+int of_property_notify(int action, struct device_node *np,
+ struct property *prop, struct property *oldprop)
+{
+ struct of_prop_reconfig pr;
+
+ /* only call notifiers if the node is attached */
+ if (!of_node_is_attached(np))
+ return 0;
+
+ pr.dn = np;
+ pr.prop = prop;
+ pr.old_prop = oldprop;
+ return of_reconfig_notify(action, &pr);
+}
+
+void __of_attach_node(struct device_node *np)
+{
+ const __be32 *phandle;
+ int sz;
+
+ np->name = __of_get_property(np, "name", NULL) ? : "<NULL>";
+ np->type = __of_get_property(np, "device_type", NULL) ? : "<NULL>";
+
+ phandle = __of_get_property(np, "phandle", &sz);
+ if (!phandle)
+ phandle = __of_get_property(np, "linux,phandle", &sz);
+ if (IS_ENABLED(PPC_PSERIES) && !phandle)
+ phandle = __of_get_property(np, "ibm,phandle", &sz);
+ np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0;
+
+ np->child = NULL;
+ np->sibling = np->parent->child;
+ np->allnext = np->parent->allnext;
+ np->parent->allnext = np;
+ np->parent->child = np;
+ of_node_clear_flag(np, OF_DETACHED);
+}
+
+/**
+ * of_attach_node() - Plug a device node into the tree and global list.
+ */
+int of_attach_node(struct device_node *np)
+{
+ unsigned long flags;
+
+ mutex_lock(&of_mutex);
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ __of_attach_node(np);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ __of_attach_node_sysfs(np);
+ mutex_unlock(&of_mutex);
+
+ of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+
+ return 0;
+}
+
+void __of_detach_node(struct device_node *np)
+{
+ struct device_node *parent;
+
+ if (WARN_ON(of_node_check_flag(np, OF_DETACHED)))
+ return;
+
+ parent = np->parent;
+ if (WARN_ON(!parent))
+ return;
+
+ if (of_allnodes == np)
+ of_allnodes = np->allnext;
+ else {
+ struct device_node *prev;
+ for (prev = of_allnodes;
+ prev->allnext != np;
+ prev = prev->allnext)
+ ;
+ prev->allnext = np->allnext;
+ }
+
+ if (parent->child == np)
+ parent->child = np->sibling;
+ else {
+ struct device_node *prevsib;
+ for (prevsib = np->parent->child;
+ prevsib->sibling != np;
+ prevsib = prevsib->sibling)
+ ;
+ prevsib->sibling = np->sibling;
+ }
+
+ of_node_set_flag(np, OF_DETACHED);
+}
+
+/**
+ * of_detach_node() - "Unplug" a node from the device tree.
+ *
+ * The caller must hold a reference to the node. The memory associated with
+ * the node is not freed until its refcount goes to zero.
+ */
+int of_detach_node(struct device_node *np)
+{
+ unsigned long flags;
+ int rc = 0;
+
+ mutex_lock(&of_mutex);
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ __of_detach_node(np);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ __of_detach_node_sysfs(np);
+ mutex_unlock(&of_mutex);
+
+ of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+
+ return rc;
+}
+
+/**
+ * of_node_release() - release a dynamically allocated node
+ * @kref: kref element of the node to be released
+ *
+ * In of_node_put() this function is passed to kref_put() as the destructor.
+ */
+void of_node_release(struct kobject *kobj)
+{
+ struct device_node *node = kobj_to_device_node(kobj);
+ struct property *prop = node->properties;
+
+ /* We should never be releasing nodes that haven't been detached. */
+ if (!of_node_check_flag(node, OF_DETACHED)) {
+ pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
+ dump_stack();
+ return;
+ }
+
+ if (!of_node_check_flag(node, OF_DYNAMIC))
+ return;
+
+ while (prop) {
+ struct property *next = prop->next;
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+ prop = next;
+
+ if (!prop) {
+ prop = node->deadprops;
+ node->deadprops = NULL;
+ }
+ }
+ kfree(node->full_name);
+ kfree(node->data);
+ kfree(node);
+}
+
+/**
+ * __of_prop_dup - Copy a property dynamically.
+ * @prop: Property to copy
+ * @allocflags: Allocation flags (typically pass GFP_KERNEL)
+ *
+ * Copy a property by dynamically allocating the memory of both the
+ * property stucture and the property name & contents. The property's
+ * flags have the OF_DYNAMIC bit set so that we can differentiate between
+ * dynamically allocated properties and not.
+ * Returns the newly allocated property or NULL on out of memory error.
+ */
+struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
+{
+ struct property *new;
+
+ new = kzalloc(sizeof(*new), allocflags);
+ if (!new)
+ return NULL;
+
+ /*
+ * NOTE: There is no check for zero length value.
+ * In case of a boolean property, this will allocate a value
+ * of zero bytes. We do this to work around the use
+ * of of_get_property() calls on boolean values.
+ */
+ new->name = kstrdup(prop->name, allocflags);
+ new->value = kmemdup(prop->value, prop->length, allocflags);
+ new->length = prop->length;
+ if (!new->name || !new->value)
+ goto err_free;
+
+ /* mark the property as dynamic */
+ of_property_set_flag(new, OF_DYNAMIC);
+
+ return new;
+
+ err_free:
+ kfree(new->name);
+ kfree(new->value);
+ kfree(new);
+ return NULL;
+}
+
+/**
+ * __of_node_alloc() - Create an empty device node dynamically.
+ * @full_name: Full name of the new device node
+ * @allocflags: Allocation flags (typically pass GFP_KERNEL)
+ *
+ * Create an empty device tree node, suitable for further modification.
+ * The node data are dynamically allocated and all the node flags
+ * have the OF_DYNAMIC & OF_DETACHED bits set.
+ * Returns the newly allocated node or NULL on out of memory error.
+ */
+struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
+{
+ struct device_node *node;
+
+ node = kzalloc(sizeof(*node), allocflags);
+ if (!node)
+ return NULL;
+
+ node->full_name = kstrdup(full_name, allocflags);
+ of_node_set_flag(node, OF_DYNAMIC);
+ of_node_set_flag(node, OF_DETACHED);
+ if (!node->full_name)
+ goto err_free;
+
+ of_node_init(node);
+
+ return node;
+
+ err_free:
+ kfree(node->full_name);
+ kfree(node);
+ return NULL;
+}
+
+static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
+{
+ of_node_put(ce->np);
+ list_del(&ce->node);
+ kfree(ce);
+}
+
+#ifdef DEBUG
+static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+ switch (ce->action) {
+ case OF_RECONFIG_ADD_PROPERTY:
+ pr_debug("%p: %s %s/%s\n",
+ ce, "ADD_PROPERTY ", ce->np->full_name,
+ ce->prop->name);
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ pr_debug("%p: %s %s/%s\n",
+ ce, "REMOVE_PROPERTY", ce->np->full_name,
+ ce->prop->name);
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ pr_debug("%p: %s %s/%s\n",
+ ce, "UPDATE_PROPERTY", ce->np->full_name,
+ ce->prop->name);
+ break;
+ case OF_RECONFIG_ATTACH_NODE:
+ pr_debug("%p: %s %s\n",
+ ce, "ATTACH_NODE ", ce->np->full_name);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ pr_debug("%p: %s %s\n",
+ ce, "DETACH_NODE ", ce->np->full_name);
+ break;
+ }
+}
+#else
+static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+ /* empty */
+}
+#endif
+
+static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
+ struct of_changeset_entry *rce)
+{
+ memcpy(rce, ce, sizeof(*rce));
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ rce->action = OF_RECONFIG_DETACH_NODE;
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ rce->action = OF_RECONFIG_ATTACH_NODE;
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ rce->action = OF_RECONFIG_REMOVE_PROPERTY;
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ rce->action = OF_RECONFIG_ADD_PROPERTY;
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ rce->old_prop = ce->prop;
+ rce->prop = ce->old_prop;
+ break;
+ }
+}
+
+static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
+{
+ struct of_changeset_entry ce_inverted;
+ int ret;
+
+ if (revert) {
+ __of_changeset_entry_invert(ce, &ce_inverted);
+ ce = &ce_inverted;
+ }
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ case OF_RECONFIG_DETACH_NODE:
+ ret = of_reconfig_notify(ce->action, ce->np);
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop);
+ break;
+ default:
+ pr_err("%s: invalid devicetree changeset action: %i\n", __func__,
+ (int)ce->action);
+ return;
+ }
+
+ if (ret)
+ pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name);
+}
+
+static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+{
+ struct property *old_prop, **propp;
+ unsigned long flags;
+ int ret = 0;
+
+ __of_changeset_entry_dump(ce);
+
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ __of_attach_node(ce->np);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ __of_detach_node(ce->np);
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ /* If the property is in deadprops then it must be removed */
+ for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+ if (*propp == ce->prop) {
+ *propp = ce->prop->next;
+ ce->prop->next = NULL;
+ break;
+ }
+ }
+
+ ret = __of_add_property(ce->np, ce->prop);
+ if (ret) {
+ pr_err("%s: add_property failed @%s/%s\n",
+ __func__, ce->np->full_name,
+ ce->prop->name);
+ break;
+ }
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ ret = __of_remove_property(ce->np, ce->prop);
+ if (ret) {
+ pr_err("%s: remove_property failed @%s/%s\n",
+ __func__, ce->np->full_name,
+ ce->prop->name);
+ break;
+ }
+ break;
+
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ /* If the property is in deadprops then it must be removed */
+ for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+ if (*propp == ce->prop) {
+ *propp = ce->prop->next;
+ ce->prop->next = NULL;
+ break;
+ }
+ }
+
+ ret = __of_update_property(ce->np, ce->prop, &old_prop);
+ if (ret) {
+ pr_err("%s: update_property failed @%s/%s\n",
+ __func__, ce->np->full_name,
+ ce->prop->name);
+ break;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ if (ret)
+ return ret;
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ __of_attach_node_sysfs(ce->np);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ __of_detach_node_sysfs(ce->np);
+ break;
+ case OF_RECONFIG_ADD_PROPERTY:
+ /* ignore duplicate names */
+ __of_add_property_sysfs(ce->np, ce->prop);
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ __of_remove_property_sysfs(ce->np, ce->prop);
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ __of_update_property_sysfs(ce->np, ce->prop, ce->old_prop);
+ break;
+ }
+
+ return 0;
+}
+
+static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce)
+{
+ struct of_changeset_entry ce_inverted;
+
+ __of_changeset_entry_invert(ce, &ce_inverted);
+ return __of_changeset_entry_apply(&ce_inverted);
+}
+
+/**
+ * of_changeset_init - Initialize a changeset for use
+ *
+ * @ocs: changeset pointer
+ *
+ * Initialize a changeset structure
+ */
+void of_changeset_init(struct of_changeset *ocs)
+{
+ memset(ocs, 0, sizeof(*ocs));
+ INIT_LIST_HEAD(&ocs->entries);
+}
+
+/**
+ * of_changeset_destroy - Destroy a changeset
+ *
+ * @ocs: changeset pointer
+ *
+ * Destroys a changeset. Note that if a changeset is applied,
+ * its changes to the tree cannot be reverted.
+ */
+void of_changeset_destroy(struct of_changeset *ocs)
+{
+ struct of_changeset_entry *ce, *cen;
+
+ list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+ __of_changeset_entry_destroy(ce);
+}
+
+/**
+ * of_changeset_apply - Applies a changeset
+ *
+ * @ocs: changeset pointer
+ *
+ * Applies a changeset to the live tree.
+ * Any side-effects of live tree state changes are applied here on
+ * sucess, like creation/destruction of devices and side-effects
+ * like creation of sysfs properties and directories.
+ * Returns 0 on success, a negative error value in case of an error.
+ * On error the partially applied effects are reverted.
+ */
+int of_changeset_apply(struct of_changeset *ocs)
+{
+ struct of_changeset_entry *ce;
+ int ret;
+
+ /* perform the rest of the work */
+ pr_debug("of_changeset: applying...\n");
+ list_for_each_entry(ce, &ocs->entries, node) {
+ ret = __of_changeset_entry_apply(ce);
+ if (ret) {
+ pr_err("%s: Error applying changeset (%d)\n", __func__, ret);
+ list_for_each_entry_continue_reverse(ce, &ocs->entries, node)
+ __of_changeset_entry_revert(ce);
+ return ret;
+ }
+ }
+ pr_debug("of_changeset: applied, emitting notifiers.\n");
+
+ /* drop the global lock while emitting notifiers */
+ mutex_unlock(&of_mutex);
+ list_for_each_entry(ce, &ocs->entries, node)
+ __of_changeset_entry_notify(ce, 0);
+ mutex_lock(&of_mutex);
+ pr_debug("of_changeset: notifiers sent.\n");
+
+ return 0;
+}
+
+/**
+ * of_changeset_revert - Reverts an applied changeset
+ *
+ * @ocs: changeset pointer
+ *
+ * Reverts a changeset returning the state of the tree to what it
+ * was before the application.
+ * Any side-effects like creation/destruction of devices and
+ * removal of sysfs properties and directories are applied.
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_revert(struct of_changeset *ocs)
+{
+ struct of_changeset_entry *ce;
+ int ret;
+
+ pr_debug("of_changeset: reverting...\n");
+ list_for_each_entry_reverse(ce, &ocs->entries, node) {
+ ret = __of_changeset_entry_revert(ce);
+ if (ret) {
+ pr_err("%s: Error reverting changeset (%d)\n", __func__, ret);
+ list_for_each_entry_continue(ce, &ocs->entries, node)
+ __of_changeset_entry_apply(ce);
+ return ret;
+ }
+ }
+ pr_debug("of_changeset: reverted, emitting notifiers.\n");
+
+ /* drop the global lock while emitting notifiers */
+ mutex_unlock(&of_mutex);
+ list_for_each_entry_reverse(ce, &ocs->entries, node)
+ __of_changeset_entry_notify(ce, 1);
+ mutex_lock(&of_mutex);
+ pr_debug("of_changeset: notifiers sent.\n");
+
+ return 0;
+}
+
+/**
+ * of_changeset_action - Perform a changeset action
+ *
+ * @ocs: changeset pointer
+ * @action: action to perform
+ * @np: Pointer to device node
+ * @prop: Pointer to property
+ *
+ * On action being one of:
+ * + OF_RECONFIG_ATTACH_NODE
+ * + OF_RECONFIG_DETACH_NODE,
+ * + OF_RECONFIG_ADD_PROPERTY
+ * + OF_RECONFIG_REMOVE_PROPERTY,
+ * + OF_RECONFIG_UPDATE_PROPERTY
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+ struct device_node *np, struct property *prop)
+{
+ struct of_changeset_entry *ce;
+
+ ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+ if (!ce) {
+ pr_err("%s: Failed to allocate\n", __func__);
+ return -ENOMEM;
+ }
+ /* get a reference to the node */
+ ce->action = action;
+ ce->np = of_node_get(np);
+ ce->prop = prop;
+
+ if (action == OF_RECONFIG_UPDATE_PROPERTY && prop)
+ ce->old_prop = of_find_property(np, prop->name, NULL);
+
+ /* add it to the list */
+ list_add_tail(&ce->node, &ocs->entries);
+ return 0;
+}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 9aa012e6ea0..f46a24ffa3f 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -923,24 +923,24 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
}
#ifdef CONFIG_HAVE_MEMBLOCK
+#define MAX_PHYS_ADDR ((phys_addr_t)~0)
+
void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
{
const u64 phys_offset = __pa(PAGE_OFFSET);
base &= PAGE_MASK;
size &= PAGE_MASK;
- if (sizeof(phys_addr_t) < sizeof(u64)) {
- if (base > ULONG_MAX) {
- pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
- base, base + size);
- return;
- }
+ if (base > MAX_PHYS_ADDR) {
+ pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
+ base, base + size);
+ return;
+ }
- if (base + size > ULONG_MAX) {
- pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
- ULONG_MAX, base + size);
- size = ULONG_MAX - base;
- }
+ if (base + size > MAX_PHYS_ADDR) {
+ pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
+ ULONG_MAX, base + size);
+ size = MAX_PHYS_ADDR - base;
}
if (base + size < phys_offset) {
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index ff350c8fa7a..858e0a5d9a1 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -31,6 +31,63 @@ struct alias_prop {
char stem[0];
};
-extern struct mutex of_aliases_mutex;
+extern struct mutex of_mutex;
extern struct list_head aliases_lookup;
+extern struct kset *of_kset;
+
+
+static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
+{
+ return container_of(kobj, struct device_node, kobj);
+}
+
+#if defined(CONFIG_OF_DYNAMIC)
+extern int of_property_notify(int action, struct device_node *np,
+ struct property *prop, struct property *old_prop);
+extern void of_node_release(struct kobject *kobj);
+#else /* CONFIG_OF_DYNAMIC */
+static inline int of_property_notify(int action, struct device_node *np,
+ struct property *prop, struct property *old_prop)
+{
+ return 0;
+}
+#endif /* CONFIG_OF_DYNAMIC */
+
+/**
+ * General utilities for working with live trees.
+ *
+ * All functions with two leading underscores operate
+ * without taking node references, so you either have to
+ * own the devtree lock or work on detached trees only.
+ */
+struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
+struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
+
+extern const void *__of_get_property(const struct device_node *np,
+ const char *name, int *lenp);
+extern int __of_add_property(struct device_node *np, struct property *prop);
+extern int __of_add_property_sysfs(struct device_node *np,
+ struct property *prop);
+extern int __of_remove_property(struct device_node *np, struct property *prop);
+extern void __of_remove_property_sysfs(struct device_node *np,
+ struct property *prop);
+extern int __of_update_property(struct device_node *np,
+ struct property *newprop, struct property **oldprop);
+extern void __of_update_property_sysfs(struct device_node *np,
+ struct property *newprop, struct property *oldprop);
+
+extern void __of_attach_node(struct device_node *np);
+extern int __of_attach_node_sysfs(struct device_node *np);
+extern void __of_detach_node(struct device_node *np);
+extern void __of_detach_node_sysfs(struct device_node *np);
+
+/* iterators for transactions, used for overlays */
+/* forward iterator */
+#define for_each_transaction_entry(_oft, _te) \
+ list_for_each_entry(_te, &(_oft)->te_list, node)
+
+/* reverse iterator */
+#define for_each_transaction_entry_reverse(_oft, _te) \
+ list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
+
#endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 632aae86137..59fb12e84e6 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void)
for (i = 0; i < reserved_mem_count; i++) {
struct reserved_mem *rmem = &reserved_mem[i];
unsigned long node = rmem->fdt_node;
+ int len;
+ const __be32 *prop;
int err = 0;
+ prop = of_get_flat_dt_prop(node, "phandle", &len);
+ if (!prop)
+ prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
+ if (prop)
+ rmem->phandle = of_read_number(prop, len/4);
+
if (rmem->size == 0)
err = __reserved_mem_alloc_size(node, rmem->name,
&rmem->base, &rmem->size);
@@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void)
__reserved_mem_init_node(rmem);
}
}
+
+static inline struct reserved_mem *__find_rmem(struct device_node *node)
+{
+ unsigned int i;
+
+ if (!node->phandle)
+ return NULL;
+
+ for (i = 0; i < reserved_mem_count; i++)
+ if (reserved_mem[i].phandle == node->phandle)
+ return &reserved_mem[i];
+ return NULL;
+}
+
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ *
+ * This function assign memory region pointed by "memory-region" device tree
+ * property to the given device.
+ */
+void of_reserved_mem_device_init(struct device *dev)
+{
+ struct reserved_mem *rmem;
+ struct device_node *np;
+
+ np = of_parse_phandle(dev->of_node, "memory-region", 0);
+ if (!np)
+ return;
+
+ rmem = __find_rmem(np);
+ of_node_put(np);
+
+ if (!rmem || !rmem->ops || !rmem->ops->device_init)
+ return;
+
+ rmem->ops->device_init(rmem, dev);
+ dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+}
+
+/**
+ * of_reserved_mem_device_release() - release reserved memory device structures
+ *
+ * This function releases structures allocated for memory region handling for
+ * the given device.
+ */
+void of_reserved_mem_device_release(struct device *dev)
+{
+ struct reserved_mem *rmem;
+ struct device_node *np;
+
+ np = of_parse_phandle(dev->of_node, "memory-region", 0);
+ if (!np)
+ return;
+
+ rmem = __find_rmem(np);
+ of_node_put(np);
+
+ if (!rmem || !rmem->ops || !rmem->ops->device_release)
+ return;
+
+ rmem->ops->device_release(rmem, dev);
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 500436f9be7..0197725e033 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus,
break;
}
}
+ of_node_set_flag(bus, OF_POPULATED_BUS);
return rc;
}
@@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate);
static int of_platform_device_destroy(struct device *dev, void *data)
{
- bool *children_left = data;
-
/* Do not touch devices not populated from the device tree */
- if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) {
- *children_left = true;
+ if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
return 0;
- }
- /* Recurse, but don't touch this device if it has any children left */
- if (of_platform_depopulate(dev) != 0) {
- *children_left = true;
- return 0;
- }
+ /* Recurse for any nodes that were treated as busses */
+ if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS))
+ device_for_each_child(dev, NULL, of_platform_device_destroy);
if (dev->bus == &platform_bus_type)
platform_device_unregister(to_platform_device(dev));
@@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data)
else if (dev->bus == &amba_bustype)
amba_device_unregister(to_amba_device(dev));
#endif
- else {
- *children_left = true;
- return 0;
- }
of_node_clear_flag(dev->of_node, OF_POPULATED);
-
+ of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
return 0;
}
/**
* of_platform_depopulate() - Remove devices populated from device tree
- * @parent: device which childred will be removed
+ * @parent: device which children will be removed
*
* Complementary to of_platform_populate(), this function removes children
* of the given device (and, recurrently, their children) that have been
@@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data)
* Returns 0 when all children devices have been removed or
* -EBUSY when some children remained.
*/
-int of_platform_depopulate(struct device *parent)
+void of_platform_depopulate(struct device *parent)
{
- bool children_left = false;
-
- device_for_each_child(parent, &children_left,
- of_platform_device_destroy);
-
- return children_left ? -EBUSY : 0;
+ device_for_each_child(parent, NULL, of_platform_device_destroy);
}
EXPORT_SYMBOL_GPL(of_platform_depopulate);
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 077314eebb9..d4100266783 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/list.h>
@@ -16,11 +17,17 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include "of_private.h"
+
static struct selftest_results {
int passed;
int failed;
} selftest_results;
+#define NO_OF_NODES 2
+static struct device_node *nodes[NO_OF_NODES];
+static int last_node_index;
+
#define selftest(result, fmt, ...) { \
if (!(result)) { \
selftest_results.failed++; \
@@ -266,6 +273,81 @@ static void __init of_selftest_property_match_string(void)
selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
}
+#define propcmp(p1, p2) (((p1)->length == (p2)->length) && \
+ (p1)->value && (p2)->value && \
+ !memcmp((p1)->value, (p2)->value, (p1)->length) && \
+ !strcmp((p1)->name, (p2)->name))
+static void __init of_selftest_property_copy(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+ struct property p1 = { .name = "p1", .length = 0, .value = "" };
+ struct property p2 = { .name = "p2", .length = 5, .value = "abcd" };
+ struct property *new;
+
+ new = __of_prop_dup(&p1, GFP_KERNEL);
+ selftest(new && propcmp(&p1, new), "empty property didn't copy correctly\n");
+ kfree(new->value);
+ kfree(new->name);
+ kfree(new);
+
+ new = __of_prop_dup(&p2, GFP_KERNEL);
+ selftest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n");
+ kfree(new->value);
+ kfree(new->name);
+ kfree(new);
+#endif
+}
+
+static void __init of_selftest_changeset(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+ struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
+ struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
+ struct property *ppremove;
+ struct device_node *n1, *n2, *n21, *nremove, *parent;
+ struct of_changeset chgset;
+
+ of_changeset_init(&chgset);
+ n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
+ selftest(n1, "testcase setup failure\n");
+ n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
+ selftest(n2, "testcase setup failure\n");
+ n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
+ selftest(n21, "testcase setup failure %p\n", n21);
+ nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
+ selftest(nremove, "testcase setup failure\n");
+ ppadd = __of_prop_dup(&padd, GFP_KERNEL);
+ selftest(ppadd, "testcase setup failure\n");
+ ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL);
+ selftest(ppupdate, "testcase setup failure\n");
+ parent = nremove->parent;
+ n1->parent = parent;
+ n2->parent = parent;
+ n21->parent = n2;
+ n2->child = n21;
+ ppremove = of_find_property(parent, "prop-remove", NULL);
+ selftest(ppremove, "failed to find removal prop");
+
+ of_changeset_init(&chgset);
+ selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n");
+ selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n");
+ selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n");
+ selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n");
+ selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n");
+ selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
+ selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
+ mutex_lock(&of_mutex);
+ selftest(!of_changeset_apply(&chgset), "apply failed\n");
+ mutex_unlock(&of_mutex);
+
+ mutex_lock(&of_mutex);
+ selftest(!of_changeset_revert(&chgset), "revert failed\n");
+ mutex_unlock(&of_mutex);
+
+ of_changeset_destroy(&chgset);
+#endif
+}
+
static void __init of_selftest_parse_interrupts(void)
{
struct device_node *np;
@@ -517,9 +599,156 @@ static void __init of_selftest_platform_populate(void)
}
}
+/**
+ * update_node_properties - adds the properties
+ * of np into dup node (present in live tree) and
+ * updates parent of children of np to dup.
+ *
+ * @np: node already present in live tree
+ * @dup: node present in live tree to be updated
+ */
+static void update_node_properties(struct device_node *np,
+ struct device_node *dup)
+{
+ struct property *prop;
+ struct device_node *child;
+
+ for_each_property_of_node(np, prop)
+ of_add_property(dup, prop);
+
+ for_each_child_of_node(np, child)
+ child->parent = dup;
+}
+
+/**
+ * attach_node_and_children - attaches nodes
+ * and its children to live tree
+ *
+ * @np: Node to attach to live tree
+ */
+static int attach_node_and_children(struct device_node *np)
+{
+ struct device_node *next, *root = np, *dup;
+
+ if (!np) {
+ pr_warn("%s: No tree to attach; not running tests\n",
+ __func__);
+ return -ENODATA;
+ }
+
+
+ /* skip root node */
+ np = np->child;
+ /* storing a copy in temporary node */
+ dup = np;
+
+ while (dup) {
+ nodes[last_node_index++] = dup;
+ dup = dup->sibling;
+ }
+ dup = NULL;
+
+ while (np) {
+ next = np->allnext;
+ dup = of_find_node_by_path(np->full_name);
+ if (dup)
+ update_node_properties(np, dup);
+ else {
+ np->child = NULL;
+ if (np->parent == root)
+ np->parent = of_allnodes;
+ of_attach_node(np);
+ }
+ np = next;
+ }
+
+ return 0;
+}
+
+/**
+ * selftest_data_add - Reads, copies data from
+ * linked tree and attaches it to the live tree
+ */
+static int __init selftest_data_add(void)
+{
+ void *selftest_data;
+ struct device_node *selftest_data_node;
+ extern uint8_t __dtb_testcases_begin[];
+ extern uint8_t __dtb_testcases_end[];
+ const int size = __dtb_testcases_end - __dtb_testcases_begin;
+
+ if (!size || !of_allnodes) {
+ pr_warn("%s: No testcase data to attach; not running tests\n",
+ __func__);
+ return -ENODATA;
+ }
+
+ /* creating copy */
+ selftest_data = kmemdup(__dtb_testcases_begin, size, GFP_KERNEL);
+
+ if (!selftest_data) {
+ pr_warn("%s: Failed to allocate memory for selftest_data; "
+ "not running tests\n", __func__);
+ return -ENOMEM;
+ }
+ of_fdt_unflatten_tree(selftest_data, &selftest_data_node);
+
+ /* attach the sub-tree to live tree */
+ return attach_node_and_children(selftest_data_node);
+}
+
+/**
+ * detach_node_and_children - detaches node
+ * and its children from live tree
+ *
+ * @np: Node to detach from live tree
+ */
+static void detach_node_and_children(struct device_node *np)
+{
+ while (np->child)
+ detach_node_and_children(np->child);
+
+ while (np->sibling)
+ detach_node_and_children(np->sibling);
+
+ of_detach_node(np);
+}
+
+/**
+ * selftest_data_remove - removes the selftest data
+ * nodes from the live tree
+ */
+static void selftest_data_remove(void)
+{
+ struct device_node *np;
+ struct property *prop;
+
+ while (last_node_index >= 0) {
+ if (nodes[last_node_index]) {
+ np = of_find_node_by_path(nodes[last_node_index]->full_name);
+ if (strcmp(np->full_name, "/aliases") != 0) {
+ detach_node_and_children(np->child);
+ of_detach_node(np);
+ } else {
+ for_each_property_of_node(np, prop) {
+ if (strcmp(prop->name, "testcase-alias") == 0)
+ of_remove_property(np, prop);
+ }
+ }
+ }
+ last_node_index--;
+ }
+}
+
static int __init of_selftest(void)
{
struct device_node *np;
+ int res;
+
+ /* adding data for selftest */
+ res = selftest_data_add();
+ if (res)
+ return res;
np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
if (!np) {
@@ -533,12 +762,18 @@ static int __init of_selftest(void)
of_selftest_dynamic();
of_selftest_parse_phandle_with_args();
of_selftest_property_match_string();
+ of_selftest_property_copy();
+ of_selftest_changeset();
of_selftest_parse_interrupts();
of_selftest_parse_interrupts_extended();
of_selftest_match_node();
of_selftest_platform_populate();
pr_info("end of selftest - %i passed, %i failed\n",
selftest_results.passed, selftest_results.failed);
+
+ /* removing selftest data from live tree */
+ selftest_data_remove();
+
return 0;
}
late_initcall(of_selftest);
diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts
new file mode 100644
index 00000000000..219ef9324e9
--- /dev/null
+++ b/drivers/of/testcase-data/testcases.dts
@@ -0,0 +1,15 @@
+/dts-v1/;
+/ {
+ testcase-data {
+ changeset {
+ prop-update = "hello";
+ prop-remove = "world";
+ node-remove {
+ };
+ };
+ };
+};
+#include "tests-phandle.dtsi"
+#include "tests-interrupts.dtsi"
+#include "tests-match.dtsi"
+#include "tests-platform.dtsi"
diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi
deleted file mode 100644
index 6d8d980ac85..00000000000
--- a/drivers/of/testcase-data/testcases.dtsi
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "tests-phandle.dtsi"
-#include "tests-interrupts.dtsi"
-#include "tests-match.dtsi"
-#include "tests-platform.dtsi"
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 93aa29f6d39..f2945fa73d4 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -375,11 +375,11 @@ static void __exit cleanup_slots(void)
static int __init rpaphp_init(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
- while ((dn = of_find_node_by_name(dn, "pci")))
+ for_each_node_by_name(dn, "pci")
rpaphp_add_slot(dn);
return 0;
diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig
index 6bbc36fbd6e..e4603985dce 100644
--- a/drivers/scsi/cxgbi/cxgb3i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig
@@ -1,6 +1,6 @@
config SCSI_CXGB3_ISCSI
tristate "Chelsio T3 iSCSI support"
- depends on PCI && INET
+ depends on PCI && INET && (IPV6 || IPV6=n)
select NETDEVICES
select ETHERNET
select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 16b2c7d2661..8c4e423037b 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -1,6 +1,6 @@
config SCSI_CXGB4_ISCSI
tristate "Chelsio T4 iSCSI support"
- depends on PCI && INET
+ depends on PCI && INET && (IPV6 || IPV6=n)
select NETDEVICES
select ETHERNET
select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 43fea2219f8..ae45bd99bae 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -472,7 +472,8 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport)
if (delay > 0)
queue_delayed_work(system_long_wq, &rport->reconnect_work,
1UL * delay * HZ);
- if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
+ if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) &&
+ srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev),
rport->state);
scsi_target_block(&shost->shost_gendev);
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 0419b69e270..4f485e88f60 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc)
*
* The byte channel to be used for the console is specified via a "stdout"
* property in the /chosen node.
- *
- * For compatible with legacy device trees, we also look for a "stdout" alias.
*/
static int find_console_handle(void)
{
- struct device_node *np, *np2;
+ struct device_node *np = of_stdout;
const char *sprop = NULL;
const uint32_t *iprop;
- np = of_find_node_by_path("/chosen");
- if (np)
- sprop = of_get_property(np, "stdout-path", NULL);
-
- if (!np || !sprop) {
- of_node_put(np);
- np = of_find_node_by_name(NULL, "aliases");
- if (np)
- sprop = of_get_property(np, "stdout", NULL);
- }
-
- if (!sprop) {
- of_node_put(np);
- return 0;
- }
-
/* We don't care what the aliased node is actually called. We only
* care if it's compatible with "epapr,hv-byte-channel", because that
- * indicates that it's a byte channel node. We use a temporary
- * variable, 'np2', because we can't release 'np' until we're done with
- * 'sprop'.
+ * indicates that it's a byte channel node.
*/
- np2 = of_find_node_by_path(sprop);
- of_node_put(np);
- np = np2;
- if (!np) {
- pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
- return 0;
- }
-
- /* Is it a byte channel? */
- if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
- of_node_put(np);
+ if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel"))
return 0;
- }
stdout_irq = irq_of_parse_and_map(np, 0);
if (stdout_irq == NO_IRQ) {
- pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop);
- of_node_put(np);
+ pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name);
return 0;
}
@@ -167,12 +135,9 @@ static int find_console_handle(void)
if (!iprop) {
pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
np->name);
- of_node_put(np);
return 0;
}
stdout_bc = be32_to_cpu(*iprop);
-
- of_node_put(np);
return 1;
}
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index a585079b4b3..a2cc5f834c6 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -342,22 +342,13 @@ static void udbg_init_opal_common(void)
void __init hvc_opal_init_early(void)
{
- struct device_node *stdout_node = NULL;
+ struct device_node *stdout_node = of_node_get(of_stdout);
const __be32 *termno;
- const char *name = NULL;
const struct hv_ops *ops;
u32 index;
- /* find the boot console from /chosen/stdout */
- if (of_chosen)
- name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- if (name) {
- stdout_node = of_find_node_by_path(name);
- if (!stdout_node) {
- pr_err("hvc_opal: Failed to locate default console!\n");
- return;
- }
- } else {
+ /* If the console wasn't in /chosen, try /ibm,opal */
+ if (!stdout_node) {
struct device_node *opal, *np;
/* Current OPAL takeover doesn't provide the stdout
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index b594abfbf21..5618b5fc750 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -404,42 +404,35 @@ module_exit(hvc_vio_exit);
void __init hvc_vio_init_early(void)
{
- struct device_node *stdout_node;
const __be32 *termno;
const char *name;
const struct hv_ops *ops;
/* find the boot console from /chosen/stdout */
- if (!of_chosen)
+ if (!of_stdout)
return;
- name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- if (name == NULL)
- return;
- stdout_node = of_find_node_by_path(name);
- if (!stdout_node)
- return;
- name = of_get_property(stdout_node, "name", NULL);
+ name = of_get_property(of_stdout, "name", NULL);
if (!name) {
printk(KERN_WARNING "stdout node missing 'name' property!\n");
- goto out;
+ return;
}
/* Check if it's a virtual terminal */
if (strncmp(name, "vty", 3) != 0)
- goto out;
- termno = of_get_property(stdout_node, "reg", NULL);
+ return;
+ termno = of_get_property(of_stdout, "reg", NULL);
if (termno == NULL)
- goto out;
+ return;
hvterm_priv0.termno = of_read_number(termno, 1);
spin_lock_init(&hvterm_priv0.buf_lock);
hvterm_privs[0] = &hvterm_priv0;
/* Check the protocol */
- if (of_device_is_compatible(stdout_node, "hvterm1")) {
+ if (of_device_is_compatible(of_stdout, "hvterm1")) {
hvterm_priv0.proto = HV_PROTOCOL_RAW;
ops = &hvterm_raw_ops;
}
- else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
+ else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) {
hvterm_priv0.proto = HV_PROTOCOL_HVSI;
ops = &hvterm_hvsi_ops;
hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
@@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void)
/* HVSI, perform the handshake now */
hvsilib_establish(&hvterm_priv0.hvsi);
} else
- goto out;
+ return;
udbg_putc = udbg_hvc_putc;
udbg_getc = udbg_hvc_getc;
udbg_getc_poll = udbg_hvc_getc_poll;
@@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void)
* backend for HVSI, only do udbg
*/
if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
- goto out;
+ return;
#endif
/* Check whether the user has requested a different console. */
if (!strstr(cmd_line, "console="))
add_preferred_console("hvc", 0, NULL);
hvc_instantiate(0, 0, ops);
-out:
- of_node_put(stdout_node);
}
/* call this from early_init() for a working debug console on
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index f7ad5b90305..abbfedb8490 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -1653,8 +1653,7 @@ static int __init pmz_probe(void)
/*
* Find all escc chips in the system
*/
- node_p = of_find_node_by_name(NULL, "escc");
- while (node_p) {
+ for_each_node_by_name(node_p, "escc") {
/*
* First get channel A/B node pointers
*
@@ -1672,7 +1671,7 @@ static int __init pmz_probe(void)
of_node_put(node_b);
printk(KERN_ERR "pmac_zilog: missing node %c for escc %s\n",
(!node_a) ? 'a' : 'b', node_p->full_name);
- goto next;
+ continue;
}
/*
@@ -1699,11 +1698,9 @@ static int __init pmz_probe(void)
of_node_put(node_b);
memset(&pmz_ports[count], 0, sizeof(struct uart_pmac_port));
memset(&pmz_ports[count+1], 0, sizeof(struct uart_pmac_port));
- goto next;
+ continue;
}
count += 2;
-next:
- node_p = of_find_node_by_name(node_p, "escc");
}
pmz_ports_count = count;
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 8bb19da0163..29a7be47389 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/console.h>
+#include <linux/of.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/device.h>
@@ -2611,6 +2612,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
spin_lock_init(&uport->lock);
lockdep_set_class(&uport->lock, &port_lock_key);
}
+ if (uport->cons && uport->dev)
+ of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
uart_configure_port(drv, state, uport);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204b921..d8c57636b9c 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
depends on VFIO && SPAPR_TCE_IOMMU
default n
+config VFIO_SPAPR_EEH
+ tristate
+ depends on EEH && VFIO_IOMMU_SPAPR_TCE
+ default n
+
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
select VFIO_IOMMU_TYPE1 if X86
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+ select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc75e8..0b035b12600 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
obj-$(CONFIG_VFIO) += vfio.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f36e3..f7825332a32 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nointxmask,
"Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
+static DEFINE_MUTEX(driver_lock);
+
+static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
+
static int vfio_pci_enable(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
@@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
u16 cmd;
u8 msix_pos;
+ /* Don't allow our initial saved state to include busmaster */
+ pci_clear_master(pdev);
+
ret = pci_enable_device(pdev);
if (ret)
return ret;
@@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
struct pci_dev *pdev = vdev->pdev;
int bar;
- pci_disable_device(pdev);
+ /* Stop the device from further DMA */
+ pci_clear_master(pdev);
vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
VFIO_IRQ_SET_ACTION_TRIGGER,
@@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vdev->barmap[bar] = NULL;
}
+ vdev->needs_reset = true;
+
/*
* If we have saved state, restore it. If we can reset the device,
* even better. Resetting with current state seems better than
@@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
__func__, dev_name(&pdev->dev));
if (!vdev->reset_works)
- return;
+ goto out;
pci_save_state(pdev);
}
@@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
if (ret)
pr_warn("%s: Failed to reset device %s (%d)\n",
__func__, dev_name(&pdev->dev), ret);
+ else
+ vdev->needs_reset = false;
}
pci_restore_state(pdev);
+out:
+ pci_disable_device(pdev);
+
+ vfio_pci_try_bus_reset(vdev);
}
static void vfio_pci_release(void *device_data)
{
struct vfio_pci_device *vdev = device_data;
- if (atomic_dec_and_test(&vdev->refcnt)) {
+ mutex_lock(&driver_lock);
+
+ if (!(--vdev->refcnt)) {
vfio_spapr_pci_eeh_release(vdev->pdev);
vfio_pci_disable(vdev);
}
+ mutex_unlock(&driver_lock);
+
module_put(THIS_MODULE);
}
static int vfio_pci_open(void *device_data)
{
struct vfio_pci_device *vdev = device_data;
- int ret;
+ int ret = 0;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- if (atomic_inc_return(&vdev->refcnt) == 1) {
+ mutex_lock(&driver_lock);
+
+ if (!vdev->refcnt) {
ret = vfio_pci_enable(vdev);
if (ret)
goto error;
- ret = vfio_spapr_pci_eeh_open(vdev->pdev);
- if (ret) {
- vfio_pci_disable(vdev);
- goto error;
- }
+ vfio_spapr_pci_eeh_open(vdev->pdev);
}
-
- return 0;
+ vdev->refcnt++;
error:
- module_put(THIS_MODULE);
+ mutex_unlock(&driver_lock);
+ if (ret)
+ module_put(THIS_MODULE);
return ret;
}
@@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
spin_lock_init(&vdev->irqlock);
- atomic_set(&vdev->refcnt, 0);
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
@@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev)
{
struct vfio_pci_device *vdev;
+ mutex_lock(&driver_lock);
+
vdev = vfio_del_group_dev(&pdev->dev);
- if (!vdev)
- return;
+ if (vdev) {
+ iommu_group_put(pdev->dev.iommu_group);
+ kfree(vdev);
+ }
- iommu_group_put(pdev->dev.iommu_group);
- kfree(vdev);
+ mutex_unlock(&driver_lock);
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = {
.err_handler = &vfio_err_handlers,
};
+/*
+ * Test whether a reset is necessary and possible. We mark devices as
+ * needs_reset when they are released, but don't have a function-local reset
+ * available. If any of these exist in the affected devices, we want to do
+ * a bus/slot reset. We also need all of the affected devices to be unused,
+ * so we abort if any device has a non-zero refcnt. driver_lock prevents a
+ * device from being opened during the scan or unbound from vfio-pci.
+ */
+static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
+{
+ bool *needs_reset = data;
+ struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
+ int ret = -EBUSY;
+
+ if (pci_drv == &vfio_pci_driver) {
+ struct vfio_device *device;
+ struct vfio_pci_device *vdev;
+
+ device = vfio_device_get_from_dev(&pdev->dev);
+ if (!device)
+ return ret;
+
+ vdev = vfio_device_data(device);
+ if (vdev) {
+ if (vdev->needs_reset)
+ *needs_reset = true;
+
+ if (!vdev->refcnt)
+ ret = 0;
+ }
+
+ vfio_device_put(device);
+ }
+
+ /*
+ * TODO: vfio-core considers groups to be viable even if some devices
+ * are attached to known drivers, like pci-stub or pcieport. We can't
+ * freeze devices from being unbound to those drivers like we can
+ * here though, so it would be racy to test for them. We also can't
+ * use device_lock() to prevent changes as that would interfere with
+ * PCI-core taking device_lock during bus reset. For now, we require
+ * devices to be bound to vfio-pci to get a bus/slot reset on release.
+ */
+
+ return ret;
+}
+
+/* Clear needs_reset on all affected devices after successful bus/slot reset */
+static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
+{
+ struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
+
+ if (pci_drv == &vfio_pci_driver) {
+ struct vfio_device *device;
+ struct vfio_pci_device *vdev;
+
+ device = vfio_device_get_from_dev(&pdev->dev);
+ if (!device)
+ return 0;
+
+ vdev = vfio_device_data(device);
+ if (vdev)
+ vdev->needs_reset = false;
+
+ vfio_device_put(device);
+ }
+
+ return 0;
+}
+
+/*
+ * Attempt to do a bus/slot reset if there are devices affected by a reset for
+ * this device that are needs_reset and all of the affected devices are unused
+ * (!refcnt). Callers of this function are required to hold driver_lock such
+ * that devices can not be unbound from vfio-pci or opened by a user while we
+ * test for and perform a bus/slot reset.
+ */
+static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
+{
+ bool needs_reset = false, slot = false;
+ int ret;
+
+ if (!pci_probe_reset_slot(vdev->pdev->slot))
+ slot = true;
+ else if (pci_probe_reset_bus(vdev->pdev->bus))
+ return;
+
+ if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
+ vfio_pci_test_bus_reset,
+ &needs_reset, slot) || !needs_reset)
+ return;
+
+ if (slot)
+ ret = pci_try_reset_slot(vdev->pdev->slot);
+ else
+ ret = pci_try_reset_bus(vdev->pdev->bus);
+
+ if (ret)
+ return;
+
+ vfio_pci_for_each_slot_or_bus(vdev->pdev,
+ vfio_pci_clear_needs_reset, NULL, slot);
+}
+
static void __exit vfio_pci_cleanup(void)
{
pci_unregister_driver(&vfio_pci_driver);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9c6d5d0f3b0..671c17a6e6d 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -54,8 +54,9 @@ struct vfio_pci_device {
bool extended_caps;
bool bardirty;
bool has_vga;
+ bool needs_reset;
struct pci_saved_state *pci_saved_state;
- atomic_t refcnt;
+ int refcnt;
struct eventfd_ctx *err_trigger;
};
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4ce143..86dfceb9201 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,20 +9,27 @@
* published by the Free Software Foundation.
*/
+#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <asm/eeh.h>
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC "VFIO IOMMU SPAPR EEH"
+
/* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
{
- return eeh_dev_open(pdev);
+ eeh_dev_open(pdev);
}
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
{
eeh_dev_release(pdev);
}
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
unsigned int cmd, unsigned long arg)
@@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
return ret;
}
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);