From f5ee37bd31678d6cb2313631f203794b5c25e862 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Thu, 9 Oct 2014 17:06:01 +0400
Subject: rbd: use a single workqueue for all devices

Using one queue per device doesn't make much sense given that our
workfn processes "devices" and not "requests".  Switch to a single
workqueue for all devices.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 drivers/block/rbd.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 0a54c588e43..be8d44af6ae 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -342,7 +342,6 @@ struct rbd_device {
 
 	struct list_head	rq_queue;	/* incoming rq queue */
 	spinlock_t		lock;		/* queue, flags, open_count */
-	struct workqueue_struct	*rq_wq;
 	struct work_struct	rq_work;
 
 	struct rbd_image_header	header;
@@ -402,6 +401,8 @@ static struct kmem_cache	*rbd_segment_name_cache;
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
+static struct workqueue_struct *rbd_wq;
+
 /*
  * Default to false for now, as single-major requires >= 0.75 version of
  * userspace rbd utility.
@@ -3452,7 +3453,7 @@ static void rbd_request_fn(struct request_queue *q)
 	}
 
 	if (queued)
-		queue_work(rbd_dev->rq_wq, &rbd_dev->rq_work);
+		queue_work(rbd_wq, &rbd_dev->rq_work);
 }
 
 /*
@@ -5242,16 +5243,9 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
 	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
 
-	rbd_dev->rq_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
-					 rbd_dev->disk->disk_name);
-	if (!rbd_dev->rq_wq) {
-		ret = -ENOMEM;
-		goto err_out_mapping;
-	}
-
 	ret = rbd_bus_add_dev(rbd_dev);
 	if (ret)
-		goto err_out_workqueue;
+		goto err_out_mapping;
 
 	/* Everything's ready.  Announce the disk to the world. */
 
@@ -5263,9 +5257,6 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 
 	return ret;
 
-err_out_workqueue:
-	destroy_workqueue(rbd_dev->rq_wq);
-	rbd_dev->rq_wq = NULL;
 err_out_mapping:
 	rbd_dev_mapping_clear(rbd_dev);
 err_out_disk:
@@ -5512,7 +5503,6 @@ static void rbd_dev_device_release(struct device *dev)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	destroy_workqueue(rbd_dev->rq_wq);
 	rbd_free_disk(rbd_dev);
 	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 	rbd_dev_mapping_clear(rbd_dev);
@@ -5716,11 +5706,21 @@ static int __init rbd_init(void)
 	if (rc)
 		return rc;
 
+	/*
+	 * The number of active work items is limited by the number of
+	 * rbd devices, so leave @max_active at default.
+	 */
+	rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
+	if (!rbd_wq) {
+		rc = -ENOMEM;
+		goto err_out_slab;
+	}
+
 	if (single_major) {
 		rbd_major = register_blkdev(0, RBD_DRV_NAME);
 		if (rbd_major < 0) {
 			rc = rbd_major;
-			goto err_out_slab;
+			goto err_out_wq;
 		}
 	}
 
@@ -5738,6 +5738,8 @@ static int __init rbd_init(void)
 err_out_blkdev:
 	if (single_major)
 		unregister_blkdev(rbd_major, RBD_DRV_NAME);
+err_out_wq:
+	destroy_workqueue(rbd_wq);
 err_out_slab:
 	rbd_slab_exit();
 	return rc;
@@ -5749,6 +5751,7 @@ static void __exit rbd_exit(void)
 	rbd_sysfs_cleanup();
 	if (single_major)
 		unregister_blkdev(rbd_major, RBD_DRV_NAME);
+	destroy_workqueue(rbd_wq);
 	rbd_slab_exit();
 }
 
-- 
cgit v1.2.3-70-g09d2


From 89baaa570ab0b476db09408d209578cfed700e9f Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 16 Oct 2014 01:50:19 -0500
Subject: libceph: use memalloc flags for net IO

This patch has ceph's lib code use the memalloc flags.

If the VM layer needs to write data out to free up memory to handle new
allocation requests, the block layer must be able to make forward progress.
To handle that requirement we use structs like mempools to reserve memory for
objects like bios and requests.

The problem is when we send/receive block layer requests over the network
layer, net skb allocations can fail and the system can lock up.
To solve this, the memalloc related flags were added. NBD, iSCSI
and NFS uses these flags to tell the network/vm layer that it should
use memory reserves to fullfill allcation requests for structs like
skbs.

I am running ceph in a bunch of VMs in my laptop, so this patch was
not tested very harshly.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Reviewed-by: Ilya Dryomov <idryomov@redhat.com>
---
 net/ceph/messenger.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 559c9f619c2..8d1653caffd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 			       IPPROTO_TCP, &sock);
 	if (ret)
 		return ret;
-	sock->sk->sk_allocation = GFP_NOFS;
+	sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC;
 
 #ifdef CONFIG_LOCKDEP
 	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
@@ -509,6 +509,9 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 
 		return ret;
 	}
+
+	sk_set_memalloc(sock->sk);
+
 	con->sock = sock;
 	return 0;
 }
@@ -2769,8 +2772,11 @@ static void con_work(struct work_struct *work)
 {
 	struct ceph_connection *con = container_of(work, struct ceph_connection,
 						   work.work);
+	unsigned long pflags = current->flags;
 	bool fault;
 
+	current->flags |= PF_MEMALLOC;
+
 	mutex_lock(&con->mutex);
 	while (true) {
 		int ret;
@@ -2824,6 +2830,8 @@ static void con_work(struct work_struct *work)
 		con_fault_finish(con);
 
 	con->ops->put(con);
+
+	tsk_restore_flags(current, pflags, PF_MEMALLOC);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a8d4205623ae965e36c68629db306ca0695a2771 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 22 Oct 2014 09:17:24 +0200
Subject: rbd: Fix error recovery in rbd_obj_read_sync()

When we fail to allocate page vector in rbd_obj_read_sync() we just
basically ignore the problem and continue which will result in an oops
later. Fix the problem by returning proper error.

CC: Yehuda Sadeh <yehuda@inktank.com>
CC: Sage Weil <sage@inktank.com>
CC: ceph-devel@vger.kernel.org
CC: stable@vger.kernel.org
Coverity-id: 1226882
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index be8d44af6ae..27b71a0b72d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3533,7 +3533,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 	page_count = (u32) calc_pages_for(offset, length);
 	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
 	if (IS_ERR(pages))
-		ret = PTR_ERR(pages);
+		return PTR_ERR(pages);
 
 	ret = -ENOMEM;
 	obj_request = rbd_obj_request_create(object_name, offset, length,
-- 
cgit v1.2.3-70-g09d2


From e9226d7c9f1d83278d78675d51acc07e1a78cb27 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Wed, 22 Oct 2014 18:15:37 +0400
Subject: libceph: eliminate unnecessary allocation in process_one_ticket()

Commit c27a3e4d667f ("libceph: do not hard code max auth ticket len")
while fixing a buffer overlow tried to keep the same as much of the
surrounding code as possible and introduced an unnecessary kmalloc() in
the unencrypted ticket path.  It is likely to fail on huge tickets, so
get rid of it.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/auth_x.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index de6662b14e1..7e38b729696 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -149,6 +149,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	struct ceph_crypto_key old_key;
 	void *ticket_buf = NULL;
 	void *tp, *tpend;
+	void **ptp;
 	struct ceph_timespec new_validity;
 	struct ceph_crypto_key new_session_key;
 	struct ceph_buffer *new_ticket_blob;
@@ -208,25 +209,19 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 			goto out;
 		}
 		tp = ticket_buf;
-		dlen = ceph_decode_32(&tp);
+		ptp = &tp;
+		tpend = *ptp + dlen;
 	} else {
 		/* unencrypted */
-		ceph_decode_32_safe(p, end, dlen, bad);
-		ticket_buf = kmalloc(dlen, GFP_NOFS);
-		if (!ticket_buf) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		tp = ticket_buf;
-		ceph_decode_need(p, end, dlen, bad);
-		ceph_decode_copy(p, ticket_buf, dlen);
+		ptp = p;
+		tpend = end;
 	}
-	tpend = tp + dlen;
+	ceph_decode_32_safe(ptp, tpend, dlen, bad);
 	dout(" ticket blob is %d bytes\n", dlen);
-	ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
-	blob_struct_v = ceph_decode_8(&tp);
-	new_secret_id = ceph_decode_64(&tp);
-	ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+	ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad);
+	blob_struct_v = ceph_decode_8(ptp);
+	new_secret_id = ceph_decode_64(ptp);
+	ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend);
 	if (ret)
 		goto out;
 
-- 
cgit v1.2.3-70-g09d2