From b411b3637fa71fce9cf2acf0639009500f5892fe Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Sep 2009 16:07:19 -0700 Subject: The DRBD driver Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 1132 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1132 insertions(+) create mode 100644 drivers/block/drbd/drbd_req.c (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c new file mode 100644 index 00000000000..0656cf1edd5 --- /dev/null +++ b/drivers/block/drbd/drbd_req.c @@ -0,0 +1,1132 @@ +/* + drbd_req.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include + +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include "drbd_req.h" + + +/* Update disk stats at start of I/O request */ +static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) +{ + const int rw = bio_data_dir(bio); + int cpu; + cpu = part_stat_lock(); + part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); + part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + mdev->vdisk->part0.in_flight[rw]++; +} + +/* Update disk stats when completing request upwards */ +static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) +{ + int rw = bio_data_dir(req->master_bio); + unsigned long duration = jiffies - req->start_time; + int cpu; + cpu = part_stat_lock(); + part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); + part_round_stats(cpu, &mdev->vdisk->part0); + part_stat_unlock(); + mdev->vdisk->part0.in_flight[rw]--; +} + +static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) +{ + const unsigned long s = req->rq_state; + /* if it was a write, we may have to set the corresponding + * bit(s) out-of-sync first. If it had a local part, we need to + * release the reference to the activity log. */ + if (rw == WRITE) { + /* remove it from the transfer log. + * well, only if it had been there in the first + * place... if it had not (local only or conflicting + * and never sent), it should still be "empty" as + * initialized in drbd_req_new(), so we can list_del() it + * here unconditionally */ + list_del(&req->tl_requests); + /* Set out-of-sync unless both OK flags are set + * (local only or remote failed). + * Other places where we set out-of-sync: + * READ with local io-error */ + if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) + drbd_set_out_of_sync(mdev, req->sector, req->size); + + if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) + drbd_set_in_sync(mdev, req->sector, req->size); + + /* one might be tempted to move the drbd_al_complete_io + * to the local io completion callback drbd_endio_pri. + * but, if this was a mirror write, we may only + * drbd_al_complete_io after this is RQ_NET_DONE, + * otherwise the extent could be dropped from the al + * before it has actually been written on the peer. + * if we crash before our peer knows about the request, + * but after the extent has been dropped from the al, + * we would forget to resync the corresponding extent. + */ + if (s & RQ_LOCAL_MASK) { + if (get_ldev_if_state(mdev, D_FAILED)) { + drbd_al_complete_io(mdev, req->sector); + put_ldev(mdev); + } else if (__ratelimit(&drbd_ratelimit_state)) { + dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " + "but my Disk seems to have failed :(\n", + (unsigned long long) req->sector); + } + } + } + + /* if it was a local io error, we want to notify our + * peer about that, and see if we need to + * detach the disk and stuff. + * to avoid allocating some special work + * struct, reuse the request. */ + + /* THINK + * why do we do this not when we detect the error, + * but delay it until it is "done", i.e. possibly + * until the next barrier ack? */ + + if (rw == WRITE && + ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { + if (!(req->w.list.next == LIST_POISON1 || + list_empty(&req->w.list))) { + /* DEBUG ASSERT only; if this triggers, we + * probably corrupt the worker list here */ + dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); + dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); + } + req->w.cb = w_io_error; + drbd_queue_work(&mdev->data.work, &req->w); + /* drbd_req_free() is done in w_io_error */ + } else { + drbd_req_free(req); + } +} + +static void queue_barrier(struct drbd_conf *mdev) +{ + struct drbd_tl_epoch *b; + + /* We are within the req_lock. Once we queued the barrier for sending, + * we set the CREATE_BARRIER bit. It is cleared as soon as a new + * barrier/epoch object is added. This is the only place this bit is + * set. It indicates that the barrier for this epoch is already queued, + * and no new epoch has been created yet. */ + if (test_bit(CREATE_BARRIER, &mdev->flags)) + return; + + b = mdev->newest_tle; + b->w.cb = w_send_barrier; + /* inc_ap_pending done here, so we won't + * get imbalanced on connection loss. + * dec_ap_pending will be done in got_BarrierAck + * or (on connection loss) in tl_clear. */ + inc_ap_pending(mdev); + drbd_queue_work(&mdev->data.work, &b->w); + set_bit(CREATE_BARRIER, &mdev->flags); +} + +static void _about_to_complete_local_write(struct drbd_conf *mdev, + struct drbd_request *req) +{ + const unsigned long s = req->rq_state; + struct drbd_request *i; + struct drbd_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; + + /* before we can signal completion to the upper layers, + * we may need to close the current epoch */ + if (mdev->state.conn >= C_CONNECTED && + req->epoch == mdev->newest_tle->br_number) + queue_barrier(mdev); + + /* we need to do the conflict detection stuff, + * if we have the ee_hash (two_primaries) and + * this has been on the network */ + if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { + const sector_t sector = req->sector; + const int size = req->size; + + /* ASSERT: + * there must be no conflicting requests, since + * they must have been failed on the spot */ +#define OVERLAPS overlaps(sector, size, i->sector, i->size) + slot = tl_hash_slot(mdev, sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " + "other: %p %llus +%u\n", + req, (unsigned long long)sector, size, + i, (unsigned long long)i->sector, i->size); + } + } + + /* maybe "wake" those conflicting epoch entries + * that wait for this request to finish. + * + * currently, there can be only _one_ such ee + * (well, or some more, which would be pending + * P_DISCARD_ACK not yet sent by the asender...), + * since we block the receiver thread upon the + * first conflict detection, which will wait on + * misc_wait. maybe we want to assert that? + * + * anyways, if we found one, + * we just have to do a wake_up. */ +#undef OVERLAPS +#define OVERLAPS overlaps(sector, size, e->sector, e->size) + slot = ee_hash_slot(mdev, req->sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + wake_up(&mdev->misc_wait); + break; + } + } + } +#undef OVERLAPS +} + +void complete_master_bio(struct drbd_conf *mdev, + struct bio_and_error *m) +{ + trace_drbd_bio(mdev, "Rq", m->bio, 1, NULL); + bio_endio(m->bio, m->error); + dec_ap_bio(mdev); +} + +/* Helper for __req_mod(). + * Set m->bio to the master bio, if it is fit to be completed, + * or leave it alone (it is initialized to NULL in __req_mod), + * if it has already been completed, or cannot be completed yet. + * If m->bio is set, the error status to be returned is placed in m->error. + */ +void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) +{ + const unsigned long s = req->rq_state; + struct drbd_conf *mdev = req->mdev; + /* only WRITES may end up here without a master bio (on barrier ack) */ + int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + + trace_drbd_req(req, nothing, "_req_may_be_done"); + + /* we must not complete the master bio, while it is + * still being processed by _drbd_send_zc_bio (drbd_send_dblock) + * not yet acknowledged by the peer + * not yet completed by the local io subsystem + * these flags may get cleared in any order by + * the worker, + * the receiver, + * the bio_endio completion callbacks. + */ + if (s & RQ_NET_QUEUED) + return; + if (s & RQ_NET_PENDING) + return; + if (s & RQ_LOCAL_PENDING) + return; + + if (req->master_bio) { + /* this is data_received (remote read) + * or protocol C P_WRITE_ACK + * or protocol B P_RECV_ACK + * or protocol A "handed_over_to_network" (SendAck) + * or canceled or failed, + * or killed from the transfer log due to connection loss. + */ + + /* + * figure out whether to report success or failure. + * + * report success when at least one of the operations succeeded. + * or, to put the other way, + * only report failure, when both operations failed. + * + * what to do about the failures is handled elsewhere. + * what we need to do here is just: complete the master_bio. + * + * local completion error, if any, has been stored as ERR_PTR + * in private_bio within drbd_endio_pri. + */ + int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); + int error = PTR_ERR(req->private_bio); + + /* remove the request from the conflict detection + * respective block_id verification hash */ + if (!hlist_unhashed(&req->colision)) + hlist_del(&req->colision); + else + D_ASSERT((s & RQ_NET_MASK) == 0); + + /* for writes we need to do some extra housekeeping */ + if (rw == WRITE) + _about_to_complete_local_write(mdev, req); + + /* Update disk stats */ + _drbd_end_io_acct(mdev, req); + + m->error = ok ? 0 : (error ?: -EIO); + m->bio = req->master_bio; + req->master_bio = NULL; + } + + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { + /* this is disconnected (local only) operation, + * or protocol C P_WRITE_ACK, + * or protocol A or B P_BARRIER_ACK, + * or killed from the transfer log due to connection loss. */ + _req_is_done(mdev, req, rw); + } + /* else: network part and not DONE yet. that is + * protocol A or B, barrier ack still pending... */ +} + +/* + * checks whether there was an overlapping request + * or ee already registered. + * + * if so, return 1, in which case this request is completed on the spot, + * without ever being submitted or send. + * + * return 0 if it is ok to submit this request. + * + * NOTE: + * paranoia: assume something above us is broken, and issues different write + * requests for the same block simultaneously... + * + * To ensure these won't be reordered differently on both nodes, resulting in + * diverging data sets, we discard the later one(s). Not that this is supposed + * to happen, but this is the rationale why we also have to check for + * conflicting requests with local origin, and why we have to do so regardless + * of whether we allowed multiple primaries. + * + * BTW, in case we only have one primary, the ee_hash is empty anyways, and the + * second hlist_for_each_entry becomes a noop. This is even simpler than to + * grab a reference on the net_conf, and check for the two_primaries flag... + */ +static int _req_conflicts(struct drbd_request *req) +{ + struct drbd_conf *mdev = req->mdev; + const sector_t sector = req->sector; + const int size = req->size; + struct drbd_request *i; + struct drbd_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; + + D_ASSERT(hlist_unhashed(&req->colision)); + + if (!get_net_conf(mdev)) + return 0; + + /* BUG_ON */ + ERR_IF (mdev->tl_hash_s == 0) + goto out_no_conflict; + BUG_ON(mdev->tl_hash == NULL); + +#define OVERLAPS overlaps(i->sector, i->size, sector, size) + slot = tl_hash_slot(mdev, sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + dev_alert(DEV, "%s[%u] Concurrent local write detected! " + "[DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)i->sector, i->size); + goto out_conflict; + } + } + + if (mdev->ee_hash_s) { + /* now, check for overlapping requests with remote origin */ + BUG_ON(mdev->ee_hash == NULL); +#undef OVERLAPS +#define OVERLAPS overlaps(e->sector, e->size, sector, size) + slot = ee_hash_slot(mdev, sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + dev_alert(DEV, "%s[%u] Concurrent remote write detected!" + " [DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)e->sector, e->size); + goto out_conflict; + } + } + } +#undef OVERLAPS + +out_no_conflict: + /* this is like it should be, and what we expected. + * our users do behave after all... */ + put_net_conf(mdev); + return 0; + +out_conflict: + put_net_conf(mdev); + return 1; +} + +/* obviously this could be coded as many single functions + * instead of one huge switch, + * or by putting the code directly in the respective locations + * (as it has been before). + * + * but having it this way + * enforces that it is all in this one place, where it is easier to audit, + * it makes it obvious that whatever "event" "happens" to a request should + * happen "atomically" within the req_lock, + * and it enforces that we have to think in a very structured manner + * about the "events" that may happen to a request during its life time ... + */ +void __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct bio_and_error *m) +{ + struct drbd_conf *mdev = req->mdev; + m->bio = NULL; + + trace_drbd_req(req, what, NULL); + + switch (what) { + default: + dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); + break; + + /* does not happen... + * initialization done in drbd_req_new + case created: + break; + */ + + case to_be_send: /* via network */ + /* reached via drbd_make_request_common + * and from w_read_retry_remote */ + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); + break; + + case to_be_submitted: /* locally */ + /* reached via drbd_make_request_common */ + D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); + req->rq_state |= RQ_LOCAL_PENDING; + break; + + case completed_ok: + if (bio_data_dir(req->master_bio) == WRITE) + mdev->writ_cnt += req->size>>9; + else + mdev->read_cnt += req->size>>9; + + req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); + req->rq_state &= ~RQ_LOCAL_PENDING; + + _req_may_be_done(req, m); + put_ldev(mdev); + break; + + case write_completed_with_error: + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + + dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", + (unsigned long long)req->sector, req->size); + /* and now: check how to handle local io error. */ + __drbd_chk_io_error(mdev, FALSE); + _req_may_be_done(req, m); + put_ldev(mdev); + break; + + case read_ahead_completed_with_error: + /* it is legal to fail READA */ + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + _req_may_be_done(req, m); + put_ldev(mdev); + break; + + case read_completed_with_error: + drbd_set_out_of_sync(mdev, req->sector, req->size); + + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + + dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", + (unsigned long long)req->sector, req->size); + /* _req_mod(req,to_be_send); oops, recursion... */ + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); + + __drbd_chk_io_error(mdev, FALSE); + put_ldev(mdev); + /* NOTE: if we have no connection, + * or know the peer has no good data either, + * then we don't actually need to "queue_for_net_read", + * but we do so anyways, since the drbd_io_error() + * and the potential state change to "Diskless" + * needs to be done from process context */ + + /* fall through: _req_mod(req,queue_for_net_read); */ + + case queue_for_net_read: + /* READ or READA, and + * no local disk, + * or target area marked as invalid, + * or just got an io-error. */ + /* from drbd_make_request_common + * or from bio_endio during read io-error recovery */ + + /* so we can verify the handle in the answer packet + * corresponding hlist_del is in _req_may_be_done() */ + hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); + + set_bit(UNPLUG_REMOTE, &mdev->flags); /* why? */ + + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = (req->rq_state & RQ_LOCAL_MASK) + ? w_read_retry_remote + : w_send_read_req; + drbd_queue_work(&mdev->data.work, &req->w); + break; + + case queue_for_net_write: + /* assert something? */ + /* from drbd_make_request_common only */ + + hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); + /* corresponding hlist_del is in _req_may_be_done() */ + + /* NOTE + * In case the req ended up on the transfer log before being + * queued on the worker, it could lead to this request being + * missed during cleanup after connection loss. + * So we have to do both operations here, + * within the same lock that protects the transfer log. + * + * _req_add_to_epoch(req); this has to be after the + * _maybe_start_new_epoch(req); which happened in + * drbd_make_request_common, because we now may set the bit + * again ourselves to close the current epoch. + * + * Add req to the (now) current epoch (barrier). */ + + /* see drbd_make_request_common, + * just after it grabs the req_lock */ + D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); + + req->epoch = mdev->newest_tle->br_number; + list_add_tail(&req->tl_requests, + &mdev->newest_tle->requests); + + /* increment size of current epoch */ + mdev->newest_tle->n_req++; + + /* queue work item to send data */ + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = w_send_dblock; + drbd_queue_work(&mdev->data.work, &req->w); + + /* close the epoch, in case it outgrew the limit */ + if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size) + queue_barrier(mdev); + + break; + + case send_canceled: + /* treat it the same */ + case send_failed: + /* real cleanup will be done from tl_clear. just update flags + * so it is no longer marked as on the worker queue */ + req->rq_state &= ~RQ_NET_QUEUED; + /* if we did it right, tl_clear should be scheduled only after + * this, so this should not be necessary! */ + _req_may_be_done(req, m); + break; + + case handed_over_to_network: + /* assert something? */ + if (bio_data_dir(req->master_bio) == WRITE && + mdev->net_conf->wire_protocol == DRBD_PROT_A) { + /* this is what is dangerous about protocol A: + * pretend it was successfully written on the peer. */ + if (req->rq_state & RQ_NET_PENDING) { + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + req->rq_state |= RQ_NET_OK; + } /* else: neg-ack was faster... */ + /* it is still not yet RQ_NET_DONE until the + * corresponding epoch barrier got acked as well, + * so we know what to dirty on connection loss */ + } + req->rq_state &= ~RQ_NET_QUEUED; + req->rq_state |= RQ_NET_SENT; + /* because _drbd_send_zc_bio could sleep, and may want to + * dereference the bio even after the "write_acked_by_peer" and + * "completed_ok" events came in, once we return from + * _drbd_send_zc_bio (drbd_send_dblock), we have to check + * whether it is done already, and end it. */ + _req_may_be_done(req, m); + break; + + case connection_lost_while_pending: + /* transfer log cleanup after connection loss */ + /* assert something? */ + if (req->rq_state & RQ_NET_PENDING) + dec_ap_pending(mdev); + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); + req->rq_state |= RQ_NET_DONE; + /* if it is still queued, we may not complete it here. + * it will be canceled soon. */ + if (!(req->rq_state & RQ_NET_QUEUED)) + _req_may_be_done(req, m); + break; + + case write_acked_by_peer_and_sis: + req->rq_state |= RQ_NET_SIS; + case conflict_discarded_by_peer: + /* for discarded conflicting writes of multiple primaries, + * there is no need to keep anything in the tl, potential + * node crashes are covered by the activity log. */ + if (what == conflict_discarded_by_peer) + dev_alert(DEV, "Got DiscardAck packet %llus +%u!" + " DRBD is not a random data generator!\n", + (unsigned long long)req->sector, req->size); + req->rq_state |= RQ_NET_DONE; + /* fall through */ + case write_acked_by_peer: + /* protocol C; successfully written on peer. + * Nothing to do here. + * We want to keep the tl in place for all protocols, to cater + * for volatile write-back caches on lower level devices. + * + * A barrier request is expected to have forced all prior + * requests onto stable storage, so completion of a barrier + * request could set NET_DONE right here, and not wait for the + * P_BARRIER_ACK, but that is an unnecessary optimization. */ + + /* this makes it effectively the same as for: */ + case recv_acked_by_peer: + /* protocol B; pretends to be successfully written on peer. + * see also notes above in handed_over_to_network about + * protocol != C */ + req->rq_state |= RQ_NET_OK; + D_ASSERT(req->rq_state & RQ_NET_PENDING); + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + _req_may_be_done(req, m); + break; + + case neg_acked: + /* assert something? */ + if (req->rq_state & RQ_NET_PENDING) + dec_ap_pending(mdev); + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); + + req->rq_state |= RQ_NET_DONE; + _req_may_be_done(req, m); + /* else: done by handed_over_to_network */ + break; + + case barrier_acked: + if (req->rq_state & RQ_NET_PENDING) { + /* barrier came in before all requests have been acked. + * this is bad, because if the connection is lost now, + * we won't be able to clean them up... */ + dev_err(DEV, "FIXME (barrier_acked but pending)\n"); + trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)"); + list_move(&req->tl_requests, &mdev->out_of_sequence_requests); + } + D_ASSERT(req->rq_state & RQ_NET_SENT); + req->rq_state |= RQ_NET_DONE; + _req_may_be_done(req, m); + break; + + case data_received: + D_ASSERT(req->rq_state & RQ_NET_PENDING); + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + _req_may_be_done(req, m); + break; + }; +} + +/* we may do a local read if: + * - we are consistent (of course), + * - or we are generally inconsistent, + * BUT we are still/already IN SYNC for this area. + * since size may be bigger than BM_BLOCK_SIZE, + * we may need to check several bits. + */ +static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) +{ + unsigned long sbnr, ebnr; + sector_t esector, nr_sectors; + + if (mdev->state.disk == D_UP_TO_DATE) + return 1; + if (mdev->state.disk >= D_OUTDATED) + return 0; + if (mdev->state.disk < D_INCONSISTENT) + return 0; + /* state.disk == D_INCONSISTENT We will have a look at the BitMap */ + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + D_ASSERT(sector < nr_sectors); + D_ASSERT(esector < nr_sectors); + + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + + return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); +} + +static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) +{ + const int rw = bio_rw(bio); + const int size = bio->bi_size; + const sector_t sector = bio->bi_sector; + struct drbd_tl_epoch *b = NULL; + struct drbd_request *req; + int local, remote; + int err = -EIO; + + /* allocate outside of all locks; */ + req = drbd_req_new(mdev, bio); + if (!req) { + dec_ap_bio(mdev); + /* only pass the error to the upper layers. + * if user cannot handle io errors, that's not our business. */ + dev_err(DEV, "could not kmalloc() req\n"); + bio_endio(bio, -ENOMEM); + return 0; + } + + trace_drbd_bio(mdev, "Rq", bio, 0, req); + + local = get_ldev(mdev); + if (!local) { + bio_put(req->private_bio); /* or we get a bio leak */ + req->private_bio = NULL; + } + if (rw == WRITE) { + remote = 1; + } else { + /* READ || READA */ + if (local) { + if (!drbd_may_do_local_read(mdev, sector, size)) { + /* we could kick the syncer to + * sync this extent asap, wait for + * it, then continue locally. + * Or just issue the request remotely. + */ + local = 0; + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } + } + remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; + } + + /* If we have a disk, but a READA request is mapped to remote, + * we are R_PRIMARY, D_INCONSISTENT, SyncTarget. + * Just fail that READA request right here. + * + * THINK: maybe fail all READA when not local? + * or make this configurable... + * if network is slow, READA won't do any good. + */ + if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) { + err = -EWOULDBLOCK; + goto fail_and_free_req; + } + + /* For WRITES going to the local disk, grab a reference on the target + * extent. This waits for any resync activity in the corresponding + * resync extent to finish, and, if necessary, pulls in the target + * extent into the activity log, which involves further disk io because + * of transactional on-disk meta data updates. */ + if (rw == WRITE && local) + drbd_al_begin_io(mdev, sector); + + remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || + (mdev->state.pdsk == D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED)); + + if (!(local || remote)) { + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + goto fail_free_complete; + } + + /* For WRITE request, we have to make sure that we have an + * unused_spare_tle, in case we need to start a new epoch. + * I try to be smart and avoid to pre-allocate always "just in case", + * but there is a race between testing the bit and pointer outside the + * spinlock, and grabbing the spinlock. + * if we lost that race, we retry. */ + if (rw == WRITE && remote && + mdev->unused_spare_tle == NULL && + test_bit(CREATE_BARRIER, &mdev->flags)) { +allocate_barrier: + b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); + if (!b) { + dev_err(DEV, "Failed to alloc barrier.\n"); + err = -ENOMEM; + goto fail_free_complete; + } + } + + /* GOOD, everything prepared, grab the spin_lock */ + spin_lock_irq(&mdev->req_lock); + + if (remote) { + remote = (mdev->state.pdsk == D_UP_TO_DATE || + (mdev->state.pdsk == D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED)); + if (!remote) + dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); + if (!(local || remote)) { + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + spin_unlock_irq(&mdev->req_lock); + goto fail_free_complete; + } + } + + if (b && mdev->unused_spare_tle == NULL) { + mdev->unused_spare_tle = b; + b = NULL; + } + if (rw == WRITE && remote && + mdev->unused_spare_tle == NULL && + test_bit(CREATE_BARRIER, &mdev->flags)) { + /* someone closed the current epoch + * while we were grabbing the spinlock */ + spin_unlock_irq(&mdev->req_lock); + goto allocate_barrier; + } + + + /* Update disk stats */ + _drbd_start_io_acct(mdev, req, bio); + + /* _maybe_start_new_epoch(mdev); + * If we need to generate a write barrier packet, we have to add the + * new epoch (barrier) object, and queue the barrier packet for sending, + * and queue the req's data after it _within the same lock_, otherwise + * we have race conditions were the reorder domains could be mixed up. + * + * Even read requests may start a new epoch and queue the corresponding + * barrier packet. To get the write ordering right, we only have to + * make sure that, if this is a write request and it triggered a + * barrier packet, this request is queued within the same spinlock. */ + if (remote && mdev->unused_spare_tle && + test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + _tl_add_barrier(mdev, mdev->unused_spare_tle); + mdev->unused_spare_tle = NULL; + } else { + D_ASSERT(!(remote && rw == WRITE && + test_bit(CREATE_BARRIER, &mdev->flags))); + } + + /* NOTE + * Actually, 'local' may be wrong here already, since we may have failed + * to write to the meta data, and may become wrong anytime because of + * local io-error for some other request, which would lead to us + * "detaching" the local disk. + * + * 'remote' may become wrong any time because the network could fail. + * + * This is a harmless race condition, though, since it is handled + * correctly at the appropriate places; so it just defers the failure + * of the respective operation. + */ + + /* mark them early for readability. + * this just sets some state flags. */ + if (remote) + _req_mod(req, to_be_send); + if (local) + _req_mod(req, to_be_submitted); + + /* check this request on the collision detection hash tables. + * if we have a conflict, just complete it here. + * THINK do we want to check reads, too? (I don't think so...) */ + if (rw == WRITE && _req_conflicts(req)) { + /* this is a conflicting request. + * even though it may have been only _partially_ + * overlapping with one of the currently pending requests, + * without even submitting or sending it, we will + * pretend that it was successfully served right now. + */ + if (local) { + bio_put(req->private_bio); + req->private_bio = NULL; + drbd_al_complete_io(mdev, req->sector); + put_ldev(mdev); + local = 0; + } + if (remote) + dec_ap_pending(mdev); + _drbd_end_io_acct(mdev, req); + /* THINK: do we want to fail it (-EIO), or pretend success? */ + bio_endio(req->master_bio, 0); + req->master_bio = NULL; + dec_ap_bio(mdev); + drbd_req_free(req); + remote = 0; + } + + /* NOTE remote first: to get the concurrent write detection right, + * we must register the request before start of local IO. */ + if (remote) { + /* either WRITE and C_CONNECTED, + * or READ, and no local disk, + * or READ, but not in sync. + */ + _req_mod(req, (rw == WRITE) + ? queue_for_net_write + : queue_for_net_read); + } + spin_unlock_irq(&mdev->req_lock); + kfree(b); /* if someone else has beaten us to it... */ + + if (local) { + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + + trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL); + + if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR + : rw == READ ? DRBD_FAULT_DT_RD + : DRBD_FAULT_DT_RA)) + bio_endio(req->private_bio, -EIO); + else + generic_make_request(req->private_bio); + } + + /* we need to plug ALWAYS since we possibly need to kick lo_dev. + * we plug after submit, so we won't miss an unplug event */ + drbd_plug_device(mdev); + + return 0; + +fail_free_complete: + if (rw == WRITE && local) + drbd_al_complete_io(mdev, sector); +fail_and_free_req: + if (local) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } + bio_endio(bio, err); + drbd_req_free(req); + dec_ap_bio(mdev); + kfree(b); + + return 0; +} + +/* helper function for drbd_make_request + * if we can determine just by the mdev (state) that this request will fail, + * return 1 + * otherwise return 0 + */ +static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) +{ + /* Unconfigured */ + if (mdev->state.conn == C_DISCONNECTING && + mdev->state.disk == D_DISKLESS) + return 1; + + if (mdev->state.role != R_PRIMARY && + (!allow_oos || is_write)) { + if (__ratelimit(&drbd_ratelimit_state)) { + dev_err(DEV, "Process %s[%u] tried to %s; " + "since we are not in Primary state, " + "we cannot allow this\n", + current->comm, current->pid, + is_write ? "WRITE" : "READ"); + } + return 1; + } + + /* + * Paranoia: we might have been primary, but sync target, or + * even diskless, then lost the connection. + * This should have been handled (panic? suspend?) somewhere + * else. But maybe it was not, so check again here. + * Caution: as long as we do not have a read/write lock on mdev, + * to serialize state changes, this is racy, since we may lose + * the connection *after* we test for the cstate. + */ + if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Sorry, I have no access to good data anymore.\n"); + return 1; + } + + return 0; +} + +int drbd_make_request_26(struct request_queue *q, struct bio *bio) +{ + unsigned int s_enr, e_enr; + struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + + if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { + bio_endio(bio, -EPERM); + return 0; + } + + /* Reject barrier requests if we know the underlying device does + * not support them. + * XXX: Need to get this info from peer as well some how so we + * XXX: reject if EITHER side/data/metadata area does not support them. + * + * because of those XXX, this is not yet enabled, + * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit. + */ + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { + /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */ + bio_endio(bio, -EOPNOTSUPP); + return 0; + } + + /* + * what we "blindly" assume: + */ + D_ASSERT(bio->bi_size > 0); + D_ASSERT((bio->bi_size & 0x1ff) == 0); + D_ASSERT(bio->bi_idx == 0); + + /* to make some things easier, force alignment of requests within the + * granularity of our hash tables */ + s_enr = bio->bi_sector >> HT_SHIFT; + e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; + + if (likely(s_enr == e_enr)) { + inc_ap_bio(mdev, 1); + return drbd_make_request_common(mdev, bio); + } + + /* can this bio be split generically? + * Maybe add our own split-arbitrary-bios function. */ + if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { + /* rather error out here than BUG in bio_split */ + dev_err(DEV, "bio would need to, but cannot, be split: " + "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", + bio->bi_vcnt, bio->bi_idx, bio->bi_size, + (unsigned long long)bio->bi_sector); + bio_endio(bio, -EINVAL); + } else { + /* This bio crosses some boundary, so we have to split it. */ + struct bio_pair *bp; + /* works for the "do not cross hash slot boundaries" case + * e.g. sector 262269, size 4096 + * s_enr = 262269 >> 6 = 4097 + * e_enr = (262269+8-1) >> 6 = 4098 + * HT_SHIFT = 6 + * sps = 64, mask = 63 + * first_sectors = 64 - (262269 & 63) = 3 + */ + const sector_t sect = bio->bi_sector; + const int sps = 1 << HT_SHIFT; /* sectors per slot */ + const int mask = sps - 1; + const sector_t first_sectors = sps - (sect & mask); + bp = bio_split(bio, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + bio_split_pool, +#endif + first_sectors); + + /* we need to get a "reference count" (ap_bio_cnt) + * to avoid races with the disconnect/reconnect/suspend code. + * In case we need to split the bio here, we need to get two references + * atomically, otherwise we might deadlock when trying to submit the + * second one! */ + inc_ap_bio(mdev, 2); + + D_ASSERT(e_enr == s_enr + 1); + + drbd_make_request_common(mdev, &bp->bio1); + drbd_make_request_common(mdev, &bp->bio2); + bio_pair_release(bp); + } + return 0; +} + +/* This is called by bio_add_page(). With this function we reduce + * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs + * units (was AL_EXTENTs). + * + * we do the calculation within the lower 32bit of the byte offsets, + * since we don't care for actual offset, but only check whether it + * would cross "activity log extent" boundaries. + * + * As long as the BIO is empty we have to allow at least one bvec, + * regardless of size and offset. so the resulting bio may still + * cross extent boundaries. those are dealt with (bio_split) in + * drbd_make_request_26. + */ +int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) +{ + struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + unsigned int bio_offset = + (unsigned int)bvm->bi_sector << 9; /* 32 bit */ + unsigned int bio_size = bvm->bi_size; + int limit, backing_limit; + + limit = DRBD_MAX_SEGMENT_SIZE + - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); + if (limit < 0) + limit = 0; + if (bio_size == 0) { + if (limit <= bvec->bv_len) + limit = bvec->bv_len; + } else if (limit && get_ldev(mdev)) { + struct request_queue * const b = + mdev->ldev->backing_bdev->bd_disk->queue; + if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) { + backing_limit = b->merge_bvec_fn(b, bvm, bvec); + limit = min(limit, backing_limit); + } + put_ldev(mdev); + } + return limit; +} -- cgit v1.2.3-70-g09d2 From ab8fafc2e1ecc0090f2c78902d3b992eec8b11f8 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 28 Sep 2009 10:28:01 +0200 Subject: dropping unneeded include autoconf.h It is force-included on the gcc command line since at least 2.6.15. Explicit include lines seem to break compilation now in certain configurations. Signed-off-by: Lars Ellenberg Signed-off-by: Kamalesh Babulal Acked-by: Sam Ravnborg --- drivers/block/drbd/drbd_main.c | 1 - drivers/block/drbd/drbd_nl.c | 1 - drivers/block/drbd/drbd_proc.c | 1 - drivers/block/drbd/drbd_receiver.c | 1 - drivers/block/drbd/drbd_req.c | 1 - drivers/block/drbd/drbd_req.h | 1 - drivers/block/drbd/drbd_worker.c | 1 - 7 files changed, 7 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index edf0b8031e6..80273f21a4a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -26,7 +26,6 @@ */ -#include #include #include #include diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1927acefe23..cfde31002df 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -23,7 +23,6 @@ */ -#include #include #include #include diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 98fcb7450c7..bdd0b4943b1 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -23,7 +23,6 @@ */ -#include #include #include diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 63686c4d85c..2f81821c2e0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -23,7 +23,6 @@ */ -#include #include #include diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 0656cf1edd5..1aaa397669a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -23,7 +23,6 @@ */ -#include #include #include diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index d37ab57f120..f22c1bc8ec7 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -25,7 +25,6 @@ #ifndef _DRBD_REQ_H #define _DRBD_REQ_H -#include #include #include diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 212e9545e63..34a4b3ef6c0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -23,7 +23,6 @@ */ -#include #include #include #include -- cgit v1.2.3-70-g09d2 From 6a0afdf58d40200abd0c717261d1bc4c49195c2f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Oct 2009 09:04:14 +0200 Subject: drbd: remove tracing bits They should be reimplemented in the current scheme. Signed-off-by: Jens Axboe --- drivers/block/drbd/Kconfig | 11 - drivers/block/drbd/Makefile | 3 - drivers/block/drbd/drbd_actlog.c | 62 +-- drivers/block/drbd/drbd_int.h | 7 - drivers/block/drbd/drbd_main.c | 36 +- drivers/block/drbd/drbd_nl.c | 9 - drivers/block/drbd/drbd_receiver.c | 30 +- drivers/block/drbd/drbd_req.c | 11 - drivers/block/drbd/drbd_tracing.c | 752 ------------------------------------- drivers/block/drbd/drbd_tracing.h | 87 ----- drivers/block/drbd/drbd_worker.c | 16 - 11 files changed, 3 insertions(+), 1021 deletions(-) delete mode 100644 drivers/block/drbd/drbd_tracing.c delete mode 100644 drivers/block/drbd/drbd_tracing.h (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 4e6f90f487c..f4acd04ebee 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -38,17 +38,6 @@ config BLK_DEV_DRBD If unsure, say N. -config DRBD_TRACE - tristate "DRBD tracing" - depends on BLK_DEV_DRBD - select TRACEPOINTS - default n - help - - Say Y here if you want to be able to trace various events in DRBD. - - If unsure, say N. - config DRBD_FAULT_INJECTION bool "DRBD fault injection" depends on BLK_DEV_DRBD diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 7d86ef8a8b4..0d3f337ff5f 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -2,7 +2,4 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o -drbd_trace-y := drbd_tracing.o - obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o -obj-$(CONFIG_DRBD_TRACE) += drbd_trace.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 74b4835d310..17956ff6a08 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -26,7 +26,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_wrappers.h" /* We maintain a trivial check sum in our on disk activity log. @@ -66,17 +65,6 @@ struct drbd_atodb_wait { int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); -/* The actual tracepoint needs to have constant number of known arguments... - */ -void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - trace__drbd_resync(mdev, level, fmt, ap); - va_end(ap); -} - static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, @@ -105,8 +93,6 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; - trace_drbd_bio(mdev, "Md", bio, 0, NULL); - if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else @@ -236,8 +222,6 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - trace_drbd_actlog(mdev, sector, "al_begin_io"); - wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); if (al_ext->lc_number != enr) { @@ -270,8 +254,6 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) struct lc_element *extent; unsigned long flags; - trace_drbd_actlog(mdev, sector, "al_complete_io"); - spin_lock_irqsave(&mdev->al_lock, flags); extent = lc_find(mdev->act_log, enr); @@ -967,10 +949,6 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - trace_drbd_resync(mdev, TRACE_LVL_METRICS, - "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", - (unsigned long long)sector, size, sbnr, ebnr); - if (sbnr > ebnr) return; @@ -1045,10 +1023,6 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, sbnr = BM_SECT_TO_BIT(sector); ebnr = BM_SECT_TO_BIT(esector); - trace_drbd_resync(mdev, TRACE_LVL_METRICS, - "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", - (unsigned long long)sector, size, sbnr, ebnr); - /* ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ spin_lock_irqsave(&mdev->al_lock, flags); @@ -1143,10 +1117,6 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i, sig; - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", - (unsigned long long)sector, enr); - sig = wait_event_interruptible(mdev->al_wait, (bm_ext = _bme_get(mdev, enr))); if (sig) @@ -1192,9 +1162,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i; - trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n", - (unsigned long long)sector); - spin_lock_irq(&mdev->al_lock); if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { /* in case you have very heavy scattered io, it may @@ -1210,11 +1177,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * the lc_put here... * we also have to wake_up */ - - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "dropping %u, apparently got 'synced' by application io\n", - mdev->resync_wenr); - e = lc_find(mdev->resync, mdev->resync_wenr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { @@ -1242,21 +1204,14 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * but then could not set BME_LOCKED, * so we tried again. * drop the extra reference. */ - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "dropping extra reference on %u\n", enr); - bm_ext->lce.refcnt--; D_ASSERT(bm_ext->lce.refcnt > 0); } goto check_al; } else { /* do we rather want to try later? */ - if (mdev->resync_locked > mdev->resync->nr_elements-3) { - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "resync_locked = %u!\n", mdev->resync_locked); - + if (mdev->resync_locked > mdev->resync->nr_elements-3) goto try_again; - } /* Do or do not. There is no try. -- Yoda */ e = lc_get(mdev->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; @@ -1281,8 +1236,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) goto check_al; } check_al: - trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr); - for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { if (unlikely(al_enr+i == mdev->act_log->new_number)) goto try_again; @@ -1296,7 +1249,6 @@ proceed: return 0; try_again: - trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr); if (bm_ext) mdev->resync_wenr = enr; spin_unlock_irq(&mdev->al_lock); @@ -1310,10 +1262,6 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; unsigned long flags; - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", - (long long)sector, enr); - spin_lock_irqsave(&mdev->al_lock, flags); e = lc_find(mdev->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; @@ -1348,8 +1296,6 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) */ void drbd_rs_cancel_all(struct drbd_conf *mdev) { - trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n"); - spin_lock_irq(&mdev->al_lock); if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ @@ -1375,8 +1321,6 @@ int drbd_rs_del_all(struct drbd_conf *mdev) struct bm_extent *bm_ext; int i; - trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n"); - spin_lock_irq(&mdev->al_lock); if (get_ldev_if_state(mdev, D_FAILED)) { @@ -1429,10 +1373,6 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, - "drbd_rs_failed_io: sector=%llus, size=%u\n", - (unsigned long long)sector, size); - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8da602e010b..4e6255991e5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -135,8 +135,6 @@ enum { DRBD_FAULT_MAX, }; -extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...); - #ifdef CONFIG_DRBD_FAULT_INJECTION extern unsigned int _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); @@ -712,11 +710,6 @@ enum epoch_event { EV_GOT_BARRIER_NR, EV_BARRIER_DONE, EV_BECAME_LAST, - EV_TRACE_FLUSH, /* TRACE_ are not real events, only used for tracing */ - EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */ - EV_TRACE_SETTING_BI, /* Barrier is expressed with the first write of the next epoch */ - EV_TRACE_ALLOC, - EV_TRACE_FREE, EV_CLEANUP = 32, /* used as flag */ }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 80273f21a4a..11d8ff6016a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -53,7 +53,6 @@ #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ #include "drbd_vli.h" @@ -80,18 +79,6 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); -DEFINE_TRACE(drbd_unplug); -DEFINE_TRACE(drbd_uuid); -DEFINE_TRACE(drbd_ee); -DEFINE_TRACE(drbd_packet); -DEFINE_TRACE(drbd_md_io); -DEFINE_TRACE(drbd_epoch); -DEFINE_TRACE(drbd_netlink); -DEFINE_TRACE(drbd_actlog); -DEFINE_TRACE(drbd_bio); -DEFINE_TRACE(_drbd_resync); -DEFINE_TRACE(drbd_req); - MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); @@ -1576,7 +1563,6 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size-sizeof(struct p_header)); - trace_drbd_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); sent = drbd_send(mdev, sock, h, size, msg_flags); ok = (sent == size); @@ -1628,8 +1614,6 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, if (!drbd_get_data_sock(mdev)) return 0; - trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__); - ok = (sizeof(h) == drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); ok = ok && (size == @@ -2359,7 +2343,6 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dp_flags |= DP_MAY_SET_IN_SYNC; p.dp_flags = cpu_to_be32(dp_flags); - trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); @@ -2410,7 +2393,6 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE); if (ok && dgs) { @@ -2546,8 +2528,6 @@ static void drbd_unplug_fn(struct request_queue *q) { struct drbd_conf *mdev = q->queuedata; - trace_drbd_unplug(mdev, "got unplugged"); - /* unplug FIRST */ spin_lock_irq(q->queue_lock); blk_remove_plug(q); @@ -3252,8 +3232,6 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - trace_drbd_md_io(mdev, WRITE, mdev->ldev); - mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); memset(buffer, 0, 512); @@ -3308,8 +3286,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - trace_drbd_md_io(mdev, READ, bdev); - mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); @@ -3388,11 +3364,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; - for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { + for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; - - trace_drbd_uuid(mdev, i+1); - } } void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) @@ -3407,7 +3380,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) } mdev->ldev->md.uuid[idx] = val; - trace_drbd_uuid(mdev, idx); drbd_md_mark_dirty(mdev); } @@ -3417,7 +3389,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) if (mdev->ldev->md.uuid[idx]) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; - trace_drbd_uuid(mdev, UI_HISTORY_START); } _drbd_uuid_set(mdev, idx, val); } @@ -3436,7 +3407,6 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) dev_info(DEV, "Creating new current UUID\n"); D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; - trace_drbd_uuid(mdev, UI_BITMAP); get_random_bytes(&val, sizeof(u64)); _drbd_uuid_set(mdev, UI_CURRENT, val); @@ -3451,8 +3421,6 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; mdev->ldev->md.uuid[UI_BITMAP] = 0; - trace_drbd_uuid(mdev, UI_HISTORY_START); - trace_drbd_uuid(mdev, UI_BITMAP); } else { if (mdev->ldev->md.uuid[UI_BITMAP]) dev_warn(DEV, "bm UUID already set"); @@ -3460,7 +3428,6 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) mdev->ldev->md.uuid[UI_BITMAP] = val; mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); - trace_drbd_uuid(mdev, UI_BITMAP); } drbd_md_mark_dirty(mdev); } @@ -3727,7 +3694,6 @@ const char *drbd_buildtag(void) module_init(drbd_init) module_exit(drbd_cleanup) -/* For drbd_tracing: */ EXPORT_SYMBOL(drbd_conn_str); EXPORT_SYMBOL(drbd_role_str); EXPORT_SYMBOL(drbd_disk_str); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cfde31002df..73c55ccb629 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -33,7 +33,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_wrappers.h" #include #include @@ -2024,8 +2023,6 @@ static void drbd_connector_callback(struct cn_msg *req) goto fail; } - trace_drbd_netlink(req, 1); - if (nlp->packet_type >= P_nl_after_last_packet) { retcode = ERR_PACKET_NR; goto fail; @@ -2063,7 +2060,6 @@ static void drbd_connector_callback(struct cn_msg *req) cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; cn_reply->flags = 0; - trace_drbd_netlink(cn_reply, 0); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); if (rr && rr != -ESRCH) printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); @@ -2157,7 +2153,6 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } @@ -2190,7 +2185,6 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } @@ -2262,7 +2256,6 @@ void drbd_bcast_ee(struct drbd_conf *mdev, reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); kfree(cn_reply); } @@ -2302,7 +2295,6 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } @@ -2356,7 +2348,6 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; reply->ret_code = ret_code; - trace_drbd_netlink(cn_reply, 0); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); if (rr && rr != -ESRCH) printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2f81821c2e0..360baf60f57 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -47,7 +47,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_req.h" #include "drbd_vli.h" @@ -350,8 +349,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->epoch = NULL; e->flags = 0; - trace_drbd_ee(mdev, e, "allocated"); - return e; fail2: @@ -366,7 +363,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct bio *bio = e->private_bio; - trace_drbd_ee(mdev, e, "freed"); drbd_pp_free_bio_pages(mdev, bio); bio_put(bio); D_ASSERT(hlist_unhashed(&e->colision)); @@ -420,7 +416,6 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) * all ignore the last argument. */ list_for_each_entry_safe(e, t, &work_list, w.list) { - trace_drbd_ee(mdev, e, "process_done_ee"); /* list_del not necessary, next/prev members not touched */ ok = e->w.cb(mdev, &e->w, !ok) && ok; drbd_free_ee(mdev, e); @@ -1021,8 +1016,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, break; } - trace_drbd_epoch(mdev, epoch, ev); - if (epoch_size != 0 && atomic_read(&epoch->active) == 0 && test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) && @@ -1054,7 +1047,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, list_del(&epoch->list); ev = EV_BECAME_LAST | (ev & EV_CLEANUP); mdev->epochs--; - trace_drbd_epoch(mdev, epoch, EV_TRACE_FREE); kfree(epoch); if (rv == FE_STILL_LIVE) @@ -1080,7 +1072,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, struct flush_work *fw; fw = kmalloc(sizeof(*fw), GFP_ATOMIC); if (fw) { - trace_drbd_epoch(mdev, epoch, EV_TRACE_FLUSH); fw->w.cb = w_flush; fw->epoch = epoch; drbd_queue_work(&mdev->data.work, &fw->w); @@ -1251,7 +1242,6 @@ static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) list_add(&epoch->list, &mdev->current_epoch->list); mdev->current_epoch = epoch; mdev->epochs++; - trace_drbd_epoch(mdev, epoch, EV_TRACE_ALLOC); } else { /* The current_epoch got recycled while we allocated this one... */ kfree(epoch); @@ -1458,8 +1448,6 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_add(&e->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); - trace_drbd_ee(mdev, e, "submitting for (rs)write"); - trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio); /* accounting done in endio */ @@ -1721,16 +1709,13 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); if (epoch == e->epoch) { set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); - trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); rw |= (1<flags |= EE_IS_BARRIER; } else { if (atomic_read(&epoch->epoch_size) > 1 || !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); - trace_drbd_epoch(mdev, epoch, EV_TRACE_SETTING_BI); set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); - trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); rw |= (1<flags |= EE_IS_BARRIER; } @@ -1905,8 +1890,6 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) } e->private_bio->bi_rw = rw; - trace_drbd_ee(mdev, e, "submitting for (data)write"); - trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio); /* accounting done in endio */ @@ -2065,8 +2048,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) inc_unacked(mdev); - trace_drbd_ee(mdev, e, "submitting for read"); - trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, fault_type, e->private_bio); maybe_kick_lo(mdev); @@ -3543,9 +3524,6 @@ static void drbdd(struct drbd_conf *mdev) drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); break; } - - trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, - __FILE__, __LINE__); } } @@ -3825,9 +3803,6 @@ static int drbd_do_handshake(struct drbd_conf *mdev) return 0; } - trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, - __FILE__, __LINE__); - p->protocol_min = be32_to_cpu(p->protocol_min); p->protocol_max = be32_to_cpu(p->protocol_max); if (p->protocol_max == 0) @@ -4420,14 +4395,11 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header)) { - trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + ERR_IF(len != expect-sizeof(struct p_header)) goto reconnect; - } } if (received == expect) { D_ASSERT(cmd != NULL); - trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); if (!cmd->process(mdev, h)) goto reconnect; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1aaa397669a..3678d3d66c6 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -28,7 +28,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_req.h" @@ -218,7 +217,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, void complete_master_bio(struct drbd_conf *mdev, struct bio_and_error *m) { - trace_drbd_bio(mdev, "Rq", m->bio, 1, NULL); bio_endio(m->bio, m->error); dec_ap_bio(mdev); } @@ -236,8 +234,6 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* only WRITES may end up here without a master bio (on barrier ack) */ int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; - trace_drbd_req(req, nothing, "_req_may_be_done"); - /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) * not yet acknowledged by the peer @@ -415,8 +411,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, struct drbd_conf *mdev = req->mdev; m->bio = NULL; - trace_drbd_req(req, what, NULL); - switch (what) { default: dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); @@ -666,7 +660,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (barrier_acked but pending)\n"); - trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)"); list_move(&req->tl_requests, &mdev->out_of_sequence_requests); } D_ASSERT(req->rq_state & RQ_NET_SENT); @@ -736,8 +729,6 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) return 0; } - trace_drbd_bio(mdev, "Rq", bio, 0, req); - local = get_ldev(mdev); if (!local) { bio_put(req->private_bio); /* or we get a bio leak */ @@ -928,8 +919,6 @@ allocate_barrier: if (local) { req->private_bio->bi_bdev = mdev->ldev->backing_bdev; - trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL); - if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD : DRBD_FAULT_DT_RA)) diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c deleted file mode 100644 index d18d4f7b4be..00000000000 --- a/drivers/block/drbd/drbd_tracing.c +++ /dev/null @@ -1,752 +0,0 @@ -/* - drbd_tracing.c - - This file is part of DRBD by Philipp Reisner and Lars Ellenberg. - - Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. - Copyright (C) 2003-2008, Philipp Reisner . - Copyright (C) 2003-2008, Lars Ellenberg . - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - */ - -#include -#include -#include -#include "drbd_int.h" -#include "drbd_tracing.h" -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Philipp Reisner, Lars Ellenberg"); -MODULE_DESCRIPTION("DRBD tracepoint probes"); -MODULE_PARM_DESC(trace_mask, "Bitmap of events to trace see drbd_tracing.c"); -MODULE_PARM_DESC(trace_level, "Current tracing level (changeable in /sys)"); -MODULE_PARM_DESC(trace_devs, "Bitmap of devices to trace (changeable in /sys)"); - -unsigned int trace_mask = 0; /* Bitmap of events to trace */ -int trace_level; /* Current trace level */ -int trace_devs; /* Bitmap of devices to trace */ - -module_param(trace_mask, uint, 0444); -module_param(trace_level, int, 0644); -module_param(trace_devs, int, 0644); - -enum { - TRACE_PACKET = 0x0001, - TRACE_RQ = 0x0002, - TRACE_UUID = 0x0004, - TRACE_RESYNC = 0x0008, - TRACE_EE = 0x0010, - TRACE_UNPLUG = 0x0020, - TRACE_NL = 0x0040, - TRACE_AL_EXT = 0x0080, - TRACE_INT_RQ = 0x0100, - TRACE_MD_IO = 0x0200, - TRACE_EPOCH = 0x0400, -}; - -/* Buffer printing support - * dbg_print_flags: used for Flags arg to drbd_print_buffer - * - DBGPRINT_BUFFADDR; if set, each line starts with the - * virtual address of the line being output. If clear, - * each line starts with the offset from the beginning - * of the buffer. */ -enum dbg_print_flags { - DBGPRINT_BUFFADDR = 0x0001, -}; - -/* Macro stuff */ -static char *nl_packet_name(int packet_type) -{ -/* Generate packet type strings */ -#define NL_PACKET(name, number, fields) \ - [P_ ## name] = # name, -#define NL_INTEGER Argh! -#define NL_BIT Argh! -#define NL_INT64 Argh! -#define NL_STRING Argh! - - static char *nl_tag_name[P_nl_after_last_packet] = { -#include "linux/drbd_nl.h" - }; - - return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ? - nl_tag_name[packet_type] : "*Unknown*"; -} -/* /Macro stuff */ - -static inline int is_mdev_trace(struct drbd_conf *mdev, unsigned int level) -{ - return trace_level >= level && ((1 << mdev_to_minor(mdev)) & trace_devs); -} - -static void probe_drbd_unplug(struct drbd_conf *mdev, char *msg) -{ - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, "%s, ap_bio_count=%d\n", msg, atomic_read(&mdev->ap_bio_cnt)); -} - -static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) -{ - static char *uuid_str[UI_EXTENDED_SIZE] = { - [UI_CURRENT] = "CURRENT", - [UI_BITMAP] = "BITMAP", - [UI_HISTORY_START] = "HISTORY_START", - [UI_HISTORY_END] = "HISTORY_END", - [UI_SIZE] = "SIZE", - [UI_FLAGS] = "FLAGS", - }; - - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - if (index >= UI_EXTENDED_SIZE) { - dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); - return; - } - - dev_info(DEV, " uuid[%s] now %016llX\n", - uuid_str[index], - (unsigned long long)mdev->ldev->md.uuid[index]); -} - -static void probe_drbd_md_io(struct drbd_conf *mdev, int rw, - struct drbd_backing_dev *bdev) -{ - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, " %s metadata superblock now\n", - rw == READ ? "Reading" : "Writing"); -} - -static void probe_drbd_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg) -{ - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, "EE %s sec=%llus size=%u e=%p\n", - msg, (unsigned long long)e->sector, e->size, e); -} - -static void probe_drbd_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch, - enum epoch_event ev) -{ - static char *epoch_event_str[] = { - [EV_PUT] = "put", - [EV_GOT_BARRIER_NR] = "got_barrier_nr", - [EV_BARRIER_DONE] = "barrier_done", - [EV_BECAME_LAST] = "became_last", - [EV_TRACE_FLUSH] = "issuing_flush", - [EV_TRACE_ADD_BARRIER] = "added_barrier", - [EV_TRACE_SETTING_BI] = "just set barrier_in_next_epoch", - }; - - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - ev &= ~EV_CLEANUP; - - switch (ev) { - case EV_TRACE_ALLOC: - dev_info(DEV, "Allocate epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); - break; - case EV_TRACE_FREE: - dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", - epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), - mdev->epochs); - break; - default: - dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", - epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), - atomic_read(&epoch->active), - test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', - test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', - test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-', - test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-', - epoch_event_str[ev]); - } -} - -static void probe_drbd_netlink(void *data, int is_req) -{ - struct cn_msg *msg = data; - - if (is_req) { - struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)msg->data; - - printk(KERN_INFO "drbd%d: " - "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n", - nlp->drbd_minor, - nl_packet_name(nlp->packet_type), - nlp->packet_type, - msg->seq, msg->ack, msg->len); - } else { - struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)msg->data; - - printk(KERN_INFO "drbd%d: " - "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n", - nlp->minor, - nlp->packet_type == P_nl_after_last_packet ? - "Empty-Reply" : nl_packet_name(nlp->packet_type), - nlp->packet_type, - msg->seq, msg->ack, msg->len); - } -} - -static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg) -{ - unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); - - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, "%s (sec=%llus, al_enr=%u, rs_enr=%d)\n", - msg, (unsigned long long) sector, enr, - (int)BM_SECT_TO_EXT(sector)); -} - -/** - * drbd_print_buffer() - Hexdump arbitrary binary data into a buffer - * @prefix: String is output at the beginning of each line output. - * @flags: Currently only defined flag: DBGPRINT_BUFFADDR; if set, each - * line starts with the virtual address of the line being - * output. If clear, each line starts with the offset from the - * beginning of the buffer. - * @size: Indicates the size of each entry in the buffer. Supported - * values are sizeof(char), sizeof(short) and sizeof(int) - * @buffer: Start address of buffer - * @buffer_va: Virtual address of start of buffer (normally the same - * as Buffer, but having it separate allows it to hold - * file address for example) - * @length: length of buffer - */ -static void drbd_print_buffer(const char *prefix, unsigned int flags, int size, - const void *buffer, const void *buffer_va, - unsigned int length) - -#define LINE_SIZE 16 -#define LINE_ENTRIES (int)(LINE_SIZE/size) -{ - const unsigned char *pstart; - const unsigned char *pstart_va; - const unsigned char *pend; - char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8]; - char *pbytes = bytes_str, *pascii = ascii_str; - int offset = 0; - long sizemask; - int field_width; - int index; - const unsigned char *pend_str; - const unsigned char *p; - int count; - - /* verify size parameter */ - if (size != sizeof(char) && - size != sizeof(short) && - size != sizeof(int)) { - printk(KERN_DEBUG "drbd_print_buffer: " - "ERROR invalid size %d\n", size); - return; - } - - sizemask = size-1; - field_width = size*2; - - /* Adjust start/end to be on appropriate boundary for size */ - buffer = (const char *)((long)buffer & ~sizemask); - pend = (const unsigned char *) - (((long)buffer + length + sizemask) & ~sizemask); - - if (flags & DBGPRINT_BUFFADDR) { - /* Move start back to nearest multiple of line size, - * if printing address. This results in nicely formatted output - * with addresses being on line size (16) byte boundaries */ - pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1)); - } else { - pstart = (const unsigned char *)buffer; - } - - /* Set value of start VA to print if addresses asked for */ - pstart_va = (const unsigned char *)buffer_va - - ((const unsigned char *)buffer-pstart); - - /* Calculate end position to nicely align right hand side */ - pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1)); - - /* Init strings */ - *pbytes = *pascii = '\0'; - - /* Start at beginning of first line */ - p = pstart; - count = 0; - - while (p < pend_str) { - if (p < (const unsigned char *)buffer || p >= pend) { - /* Before start of buffer or after end- print spaces */ - pbytes += sprintf(pbytes, "%*c ", field_width, ' '); - pascii += sprintf(pascii, "%*c", size, ' '); - p += size; - } else { - /* Add hex and ascii to strings */ - int val; - switch (size) { - default: - case 1: - val = *(unsigned char *)p; - break; - case 2: - val = *(unsigned short *)p; - break; - case 4: - val = *(unsigned int *)p; - break; - } - - pbytes += sprintf(pbytes, "%0*x ", field_width, val); - - for (index = size; index; index--) { - *pascii++ = isprint(*p) ? *p : '.'; - p++; - } - } - - count++; - - if (count == LINE_ENTRIES || p >= pend_str) { - /* Null terminate and print record */ - *pascii = '\0'; - printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n", - prefix, - (flags & DBGPRINT_BUFFADDR) - ? (long)pstart_va:(long)offset, - LINE_ENTRIES*(field_width+1), bytes_str, - LINE_SIZE, ascii_str); - - /* Move onto next line */ - pstart_va += (p-pstart); - pstart = p; - count = 0; - offset += LINE_SIZE; - - /* Re-init strings */ - pbytes = bytes_str; - pascii = ascii_str; - *pbytes = *pascii = '\0'; - } - } -} - -static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, va_list args) -{ - char str[256]; - - if (!is_mdev_trace(mdev, level)) - return; - - if (vsnprintf(str, 256, fmt, args) >= 256) - str[255] = 0; - - printk(KERN_INFO "%s %s: %s", dev_driver_string(disk_to_dev(mdev->vdisk)), - dev_name(disk_to_dev(mdev->vdisk)), str); -} - -static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, - struct drbd_request *r) -{ -#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD) -#define SECTOR_FORMAT "%Lx" -#else -#define SECTOR_FORMAT "%lx" -#endif -#define SECTOR_SHIFT 9 - - unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT); - char *faddr = (char *)(lowaddr); - char rb[sizeof(void *)*2+6] = { 0, }; - struct bio_vec *bvec; - int segno; - - const int rw = bio->bi_rw; - const int biorw = (rw & (RW_MASK|RWA_MASK)); - const int biobarrier = (rw & (1<>>", - pfx, - biorw == WRITE ? "Write" : "Read", - biobarrier ? " : B" : "", - biosync ? " : S" : "", - bio, - rb, - complete ? (bio_flagged(bio, BIO_UPTODATE) ? "Success, " : "Failed, ") : "", - bio->bi_sector << SECTOR_SHIFT, - bio->bi_size); - - if (trace_level >= TRACE_LVL_METRICS && - ((biorw == WRITE) ^ complete)) { - printk(KERN_DEBUG " ind page offset length\n"); - __bio_for_each_segment(bvec, bio, segno, 0) { - printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, - bvec->bv_page, bvec->bv_offset, bvec->bv_len); - - if (trace_level >= TRACE_LVL_ALL) { - char *bvec_buf; - unsigned long flags; - - bvec_buf = bvec_kmap_irq(bvec, &flags); - - drbd_print_buffer(" ", DBGPRINT_BUFFADDR, 1, - bvec_buf, - faddr, - (bvec->bv_len <= 0x80) - ? bvec->bv_len : 0x80); - - bvec_kunmap_irq(bvec_buf, &flags); - - if (bvec->bv_len > 0x40) - printk(KERN_DEBUG " ....\n"); - - faddr += bvec->bv_len; - } - } - } -} - -static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, char *msg) -{ - static const char *rq_event_names[] = { - [created] = "created", - [to_be_send] = "to_be_send", - [to_be_submitted] = "to_be_submitted", - [queue_for_net_write] = "queue_for_net_write", - [queue_for_net_read] = "queue_for_net_read", - [send_canceled] = "send_canceled", - [send_failed] = "send_failed", - [handed_over_to_network] = "handed_over_to_network", - [connection_lost_while_pending] = - "connection_lost_while_pending", - [recv_acked_by_peer] = "recv_acked_by_peer", - [write_acked_by_peer] = "write_acked_by_peer", - [neg_acked] = "neg_acked", - [conflict_discarded_by_peer] = "conflict_discarded_by_peer", - [barrier_acked] = "barrier_acked", - [data_received] = "data_received", - [read_completed_with_error] = "read_completed_with_error", - [read_ahead_completed_with_error] = "reada_completed_with_error", - [write_completed_with_error] = "write_completed_with_error", - [completed_ok] = "completed_ok", - }; - - struct drbd_conf *mdev = req->mdev; - - const int rw = (req->master_bio == NULL || - bio_data_dir(req->master_bio) == WRITE) ? - 'W' : 'R'; - const unsigned long s = req->rq_state; - - if (what != nothing) { - dev_info(DEV, "__req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); - } else { - dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", - msg, req, rw, - s & RQ_LOCAL_PENDING ? 'p' : '-', - s & RQ_LOCAL_COMPLETED ? 'c' : '-', - s & RQ_LOCAL_OK ? 'o' : '-', - s & RQ_NET_PENDING ? 'p' : '-', - s & RQ_NET_QUEUED ? 'q' : '-', - s & RQ_NET_SENT ? 's' : '-', - s & RQ_NET_DONE ? 'd' : '-', - s & RQ_NET_OK ? 'o' : '-', - req->epoch, - (unsigned long long)req->sector, - req->size, - drbd_conn_str(mdev->state.conn)); - } -} - - -#define drbd_peer_str drbd_role_str -#define drbd_pdsk_str drbd_disk_str - -#define PSM(A) \ -do { \ - if (mask.A) { \ - int i = snprintf(p, len, " " #A "( %s )", \ - drbd_##A##_str(val.A)); \ - if (i >= len) \ - return op; \ - p += i; \ - len -= i; \ - } \ -} while (0) - -static char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val) -{ - char *op = p; - *p = '\0'; - PSM(role); - PSM(peer); - PSM(conn); - PSM(disk); - PSM(pdsk); - - return op; -} - -#define INFOP(fmt, args...) \ -do { \ - if (trace_level >= TRACE_LVL_ALL) { \ - dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ - file, line, current->comm, current->pid, \ - sockname, recv ? "<<<" : ">>>" , \ - ## args); \ - } else { \ - dev_info(DEV, "%s %s " fmt, sockname, \ - recv ? "<<<" : ">>>" , \ - ## args); \ - } \ -} while (0) - -static char *_dump_block_id(u64 block_id, char *buff) -{ - if (is_syncer_block_id(block_id)) - strcpy(buff, "SyncerId"); - else - sprintf(buff, "%llx", (unsigned long long)block_id); - - return buff; -} - -static void probe_drbd_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, char *file, int line) -{ - char *sockname = sock == mdev->meta.socket ? "meta" : "data"; - int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command); - char tmp[300]; - union drbd_state m, v; - - switch (cmd) { - case P_HAND_SHAKE: - INFOP("%s (protocol %u-%u)\n", cmdname(cmd), - be32_to_cpu(p->handshake.protocol_min), - be32_to_cpu(p->handshake.protocol_max)); - break; - - case P_BITMAP: /* don't report this */ - case P_COMPRESSED_BITMAP: /* don't report this */ - break; - - case P_DATA: - INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->data.sector), - _dump_block_id(p->data.block_id, tmp), - be32_to_cpu(p->data.seq_num), - be32_to_cpu(p->data.dp_flags) - ); - break; - - case P_DATA_REPLY: - case P_RS_DATA_REPLY: - INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->data.sector), - _dump_block_id(p->data.block_id, tmp) - ); - break; - - case P_RECV_ACK: - case P_WRITE_ACK: - case P_RS_WRITE_ACK: - case P_DISCARD_ACK: - case P_NEG_ACK: - case P_NEG_RS_DREPLY: - INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", - cmdname(cmd), - (long long)be64_to_cpu(p->block_ack.sector), - be32_to_cpu(p->block_ack.blksize), - _dump_block_id(p->block_ack.block_id, tmp), - be32_to_cpu(p->block_ack.seq_num) - ); - break; - - case P_DATA_REQUEST: - case P_RS_DATA_REQUEST: - INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), - (long long)be64_to_cpu(p->block_req.sector), - be32_to_cpu(p->block_req.blksize), - _dump_block_id(p->block_req.block_id, tmp) - ); - break; - - case P_BARRIER: - case P_BARRIER_ACK: - INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier); - break; - - case P_SYNC_PARAM: - case P_SYNC_PARAM89: - INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", - cmdname(cmd), be32_to_cpu(p->rs_param_89.rate), - p->rs_param_89.verify_alg, p->rs_param_89.csums_alg); - break; - - case P_UUIDS: - INFOP("%s Curr:%016llX, Bitmap:%016llX, " - "HisSt:%016llX, HisEnd:%016llX\n", - cmdname(cmd), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END])); - break; - - case P_SIZES: - INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " - "max bio %x, q order %x)\n", - cmdname(cmd), - (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)), - (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)), - (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)), - be32_to_cpu(p->sizes.max_segment_size), - be32_to_cpu(p->sizes.queue_order_type)); - break; - - case P_STATE: - v.i = be32_to_cpu(p->state.state); - m.i = 0xffffffff; - dump_st(tmp, sizeof(tmp), m, v); - INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); - break; - - case P_STATE_CHG_REQ: - m.i = be32_to_cpu(p->req_state.mask); - v.i = be32_to_cpu(p->req_state.val); - dump_st(tmp, sizeof(tmp), m, v); - INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); - break; - - case P_STATE_CHG_REPLY: - INFOP("%s (ret %x)\n", cmdname(cmd), - be32_to_cpu(p->req_state_reply.retcode)); - break; - - case P_PING: - case P_PING_ACK: - /* - * Dont trace pings at summary level - */ - if (trace_level < TRACE_LVL_ALL) - break; - /* fall through... */ - default: - INFOP("%s (%u)\n", cmdname(cmd), cmd); - break; - } -} - - -static int __init drbd_trace_init(void) -{ - int ret; - - if (trace_mask & TRACE_UNPLUG) { - ret = register_trace_drbd_unplug(probe_drbd_unplug); - WARN_ON(ret); - } - if (trace_mask & TRACE_UUID) { - ret = register_trace_drbd_uuid(probe_drbd_uuid); - WARN_ON(ret); - } - if (trace_mask & TRACE_EE) { - ret = register_trace_drbd_ee(probe_drbd_ee); - WARN_ON(ret); - } - if (trace_mask & TRACE_PACKET) { - ret = register_trace_drbd_packet(probe_drbd_packet); - WARN_ON(ret); - } - if (trace_mask & TRACE_MD_IO) { - ret = register_trace_drbd_md_io(probe_drbd_md_io); - WARN_ON(ret); - } - if (trace_mask & TRACE_EPOCH) { - ret = register_trace_drbd_epoch(probe_drbd_epoch); - WARN_ON(ret); - } - if (trace_mask & TRACE_NL) { - ret = register_trace_drbd_netlink(probe_drbd_netlink); - WARN_ON(ret); - } - if (trace_mask & TRACE_AL_EXT) { - ret = register_trace_drbd_actlog(probe_drbd_actlog); - WARN_ON(ret); - } - if (trace_mask & TRACE_RQ) { - ret = register_trace_drbd_bio(probe_drbd_bio); - WARN_ON(ret); - } - if (trace_mask & TRACE_INT_RQ) { - ret = register_trace_drbd_req(probe_drbd_req); - WARN_ON(ret); - } - if (trace_mask & TRACE_RESYNC) { - ret = register_trace__drbd_resync(probe_drbd_resync); - WARN_ON(ret); - } - return 0; -} - -module_init(drbd_trace_init); - -static void __exit drbd_trace_exit(void) -{ - if (trace_mask & TRACE_UNPLUG) - unregister_trace_drbd_unplug(probe_drbd_unplug); - if (trace_mask & TRACE_UUID) - unregister_trace_drbd_uuid(probe_drbd_uuid); - if (trace_mask & TRACE_EE) - unregister_trace_drbd_ee(probe_drbd_ee); - if (trace_mask & TRACE_PACKET) - unregister_trace_drbd_packet(probe_drbd_packet); - if (trace_mask & TRACE_MD_IO) - unregister_trace_drbd_md_io(probe_drbd_md_io); - if (trace_mask & TRACE_EPOCH) - unregister_trace_drbd_epoch(probe_drbd_epoch); - if (trace_mask & TRACE_NL) - unregister_trace_drbd_netlink(probe_drbd_netlink); - if (trace_mask & TRACE_AL_EXT) - unregister_trace_drbd_actlog(probe_drbd_actlog); - if (trace_mask & TRACE_RQ) - unregister_trace_drbd_bio(probe_drbd_bio); - if (trace_mask & TRACE_INT_RQ) - unregister_trace_drbd_req(probe_drbd_req); - if (trace_mask & TRACE_RESYNC) - unregister_trace__drbd_resync(probe_drbd_resync); - - tracepoint_synchronize_unregister(); -} - -module_exit(drbd_trace_exit); diff --git a/drivers/block/drbd/drbd_tracing.h b/drivers/block/drbd/drbd_tracing.h deleted file mode 100644 index c4531a137f6..00000000000 --- a/drivers/block/drbd/drbd_tracing.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - drbd_tracing.h - - This file is part of DRBD by Philipp Reisner and Lars Ellenberg. - - Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. - Copyright (C) 2003-2008, Philipp Reisner . - Copyright (C) 2003-2008, Lars Ellenberg . - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - */ - -#ifndef DRBD_TRACING_H -#define DRBD_TRACING_H - -#include -#include "drbd_int.h" -#include "drbd_req.h" - -enum { - TRACE_LVL_ALWAYS = 0, - TRACE_LVL_SUMMARY, - TRACE_LVL_METRICS, - TRACE_LVL_ALL, - TRACE_LVL_MAX -}; - -DECLARE_TRACE(drbd_unplug, - TP_PROTO(struct drbd_conf *mdev, char* msg), - TP_ARGS(mdev, msg)); - -DECLARE_TRACE(drbd_uuid, - TP_PROTO(struct drbd_conf *mdev, enum drbd_uuid_index index), - TP_ARGS(mdev, index)); - -DECLARE_TRACE(drbd_ee, - TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg), - TP_ARGS(mdev, e, msg)); - -DECLARE_TRACE(drbd_md_io, - TP_PROTO(struct drbd_conf *mdev, int rw, struct drbd_backing_dev *bdev), - TP_ARGS(mdev, rw, bdev)); - -DECLARE_TRACE(drbd_epoch, - TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev), - TP_ARGS(mdev, epoch, ev)); - -DECLARE_TRACE(drbd_netlink, - TP_PROTO(void *data, int is_req), - TP_ARGS(data, is_req)); - -DECLARE_TRACE(drbd_actlog, - TP_PROTO(struct drbd_conf *mdev, sector_t sector, char* msg), - TP_ARGS(mdev, sector, msg)); - -DECLARE_TRACE(drbd_bio, - TP_PROTO(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, - struct drbd_request *r), - TP_ARGS(mdev, pfx, bio, complete, r)); - -DECLARE_TRACE(drbd_req, - TP_PROTO(struct drbd_request *req, enum drbd_req_event what, char *msg), - TP_ARGS(req, what, msg)); - -DECLARE_TRACE(drbd_packet, - TP_PROTO(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, char *file, int line), - TP_ARGS(mdev, sock, recv, p, file, line)); - -DECLARE_TRACE(_drbd_resync, - TP_PROTO(struct drbd_conf *mdev, int level, const char *fmt, va_list args), - TP_ARGS(mdev, level, fmt, args)); - -#endif diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 34a4b3ef6c0..ed8796f1112 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -40,7 +40,6 @@ #include "drbd_int.h" #include "drbd_req.h" -#include "drbd_tracing.h" #define SLEEP_TIME (HZ/10) @@ -82,8 +81,6 @@ void drbd_md_io_complete(struct bio *bio, int error) md_io = (struct drbd_md_io *)bio->bi_private; md_io->error = error; - trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL); - complete(&md_io->event); } @@ -114,8 +111,6 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) D_ASSERT(e->block_id != ID_VACANT); - trace_drbd_bio(mdev, "Sec", bio, 1, NULL); - spin_lock_irqsave(&mdev->req_lock, flags); mdev->read_cnt += e->size >> 9; list_del(&e->w.list); @@ -126,8 +121,6 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) drbd_chk_io_error(mdev, error, FALSE); drbd_queue_work(&mdev->data.work, &e->w); put_ldev(mdev); - - trace_drbd_ee(mdev, e, "read completed"); } /* writes on behalf of the partner, or resync writes, @@ -176,8 +169,6 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) D_ASSERT(e->block_id != ID_VACANT); - trace_drbd_bio(mdev, "Sec", bio, 1, NULL); - spin_lock_irqsave(&mdev->req_lock, flags); mdev->writ_cnt += e->size >> 9; is_syncer_req = is_syncer_block_id(e->block_id); @@ -192,8 +183,6 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - trace_drbd_ee(mdev, e, "write completed"); - /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, * neither did we wake possibly waiting conflicting requests. * done from "drbd_process_done_ee" within the appropriate w.cb @@ -244,8 +233,6 @@ void drbd_endio_pri(struct bio *bio, int error) error = -EIO; } - trace_drbd_bio(mdev, "Pri", bio, 1, NULL); - /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) @@ -1321,9 +1308,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) return; } - trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n", - side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); - /* In case a previous resync run was aborted by an IO error/detach on the peer. */ drbd_rs_cancel_all(mdev); -- cgit v1.2.3-70-g09d2 From 25d2d4edfa509b69fe4832094b8a07e634363ba3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Oct 2009 09:31:59 +0200 Subject: drbd: fixup for reverted dual in_flight patch Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3678d3d66c6..d3426ff405b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -40,7 +40,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); part_stat_unlock(); - mdev->vdisk->part0.in_flight[rw]++; + mdev->vdisk->part0.in_flight++; } /* Update disk stats when completing request upwards */ @@ -53,7 +53,7 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); part_round_stats(cpu, &mdev->vdisk->part0); part_stat_unlock(); - mdev->vdisk->part0.in_flight[rw]--; + mdev->vdisk->part0.in_flight--; } static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) -- cgit v1.2.3-70-g09d2 From a870a3a485ddf7c0dec549269ed71d169556d61c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 28 Oct 2009 09:30:27 +0100 Subject: drbd: fix in_flight rw indexing Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d3426ff405b..3678d3d66c6 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -40,7 +40,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); part_stat_unlock(); - mdev->vdisk->part0.in_flight++; + mdev->vdisk->part0.in_flight[rw]++; } /* Update disk stats when completing request upwards */ @@ -53,7 +53,7 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); part_round_stats(cpu, &mdev->vdisk->part0); part_stat_unlock(); - mdev->vdisk->part0.in_flight--; + mdev->vdisk->part0.in_flight[rw]--; } static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) -- cgit v1.2.3-70-g09d2 From 83c38830b04d4e369b9a41acbc562c0422f2f2f2 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 3 Nov 2009 02:22:06 +0100 Subject: drbd: performance - don't lose unplug events Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3678d3d66c6..d09aac4a84e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -505,7 +505,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, * corresponding hlist_del is in _req_may_be_done() */ hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); - set_bit(UNPLUG_REMOTE, &mdev->flags); /* why? */ + set_bit(UNPLUG_REMOTE, &mdev->flags); D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_NET_QUEUED; @@ -536,6 +536,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, * * Add req to the (now) current epoch (barrier). */ + /* otherwise we may lose an unplug, which may cause some remote + * io-scheduler timeout to expire, increasing maximum latency, + * hurting performance. */ + set_bit(UNPLUG_REMOTE, &mdev->flags); + /* see drbd_make_request_common, * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); -- cgit v1.2.3-70-g09d2 From 753c89130c52b96e66e5ceff19bd1336de9a5ce8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 18 Nov 2009 15:52:51 +0100 Subject: drbd_req.c: use part_[inc|dec]_in_flight() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d09aac4a84e..de81ab7b462 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -39,8 +39,8 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req cpu = part_stat_lock(); part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + part_inc_in_flight(&mdev->vdisk->part0, rw); part_stat_unlock(); - mdev->vdisk->part0.in_flight[rw]++; } /* Update disk stats when completing request upwards */ @@ -52,8 +52,8 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) cpu = part_stat_lock(); part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); part_round_stats(cpu, &mdev->vdisk->part0); + part_dec_in_flight(&mdev->vdisk->part0, rw); part_stat_unlock(); - mdev->vdisk->part0.in_flight[rw]--; } static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) -- cgit v1.2.3-70-g09d2