From f5ee37bd31678d6cb2313631f203794b5c25e862 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Oct 2014 17:06:01 +0400 Subject: rbd: use a single workqueue for all devices Using one queue per device doesn't make much sense given that our workfn processes "devices" and not "requests". Switch to a single workqueue for all devices. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- drivers/block/rbd.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 0a54c588e43..be8d44af6ae 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -342,7 +342,6 @@ struct rbd_device { struct list_head rq_queue; /* incoming rq queue */ spinlock_t lock; /* queue, flags, open_count */ - struct workqueue_struct *rq_wq; struct work_struct rq_work; struct rbd_image_header header; @@ -402,6 +401,8 @@ static struct kmem_cache *rbd_segment_name_cache; static int rbd_major; static DEFINE_IDA(rbd_dev_id_ida); +static struct workqueue_struct *rbd_wq; + /* * Default to false for now, as single-major requires >= 0.75 version of * userspace rbd utility. @@ -3452,7 +3453,7 @@ static void rbd_request_fn(struct request_queue *q) } if (queued) - queue_work(rbd_dev->rq_wq, &rbd_dev->rq_work); + queue_work(rbd_wq, &rbd_dev->rq_work); } /* @@ -5242,16 +5243,9 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); - rbd_dev->rq_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, - rbd_dev->disk->disk_name); - if (!rbd_dev->rq_wq) { - ret = -ENOMEM; - goto err_out_mapping; - } - ret = rbd_bus_add_dev(rbd_dev); if (ret) - goto err_out_workqueue; + goto err_out_mapping; /* Everything's ready. Announce the disk to the world. */ @@ -5263,9 +5257,6 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) return ret; -err_out_workqueue: - destroy_workqueue(rbd_dev->rq_wq); - rbd_dev->rq_wq = NULL; err_out_mapping: rbd_dev_mapping_clear(rbd_dev); err_out_disk: @@ -5512,7 +5503,6 @@ static void rbd_dev_device_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - destroy_workqueue(rbd_dev->rq_wq); rbd_free_disk(rbd_dev); clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_dev_mapping_clear(rbd_dev); @@ -5716,11 +5706,21 @@ static int __init rbd_init(void) if (rc) return rc; + /* + * The number of active work items is limited by the number of + * rbd devices, so leave @max_active at default. + */ + rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); + if (!rbd_wq) { + rc = -ENOMEM; + goto err_out_slab; + } + if (single_major) { rbd_major = register_blkdev(0, RBD_DRV_NAME); if (rbd_major < 0) { rc = rbd_major; - goto err_out_slab; + goto err_out_wq; } } @@ -5738,6 +5738,8 @@ static int __init rbd_init(void) err_out_blkdev: if (single_major) unregister_blkdev(rbd_major, RBD_DRV_NAME); +err_out_wq: + destroy_workqueue(rbd_wq); err_out_slab: rbd_slab_exit(); return rc; @@ -5749,6 +5751,7 @@ static void __exit rbd_exit(void) rbd_sysfs_cleanup(); if (single_major) unregister_blkdev(rbd_major, RBD_DRV_NAME); + destroy_workqueue(rbd_wq); rbd_slab_exit(); } -- cgit v1.2.3-70-g09d2 From 89baaa570ab0b476db09408d209578cfed700e9f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 16 Oct 2014 01:50:19 -0500 Subject: libceph: use memalloc flags for net IO This patch has ceph's lib code use the memalloc flags. If the VM layer needs to write data out to free up memory to handle new allocation requests, the block layer must be able to make forward progress. To handle that requirement we use structs like mempools to reserve memory for objects like bios and requests. The problem is when we send/receive block layer requests over the network layer, net skb allocations can fail and the system can lock up. To solve this, the memalloc related flags were added. NBD, iSCSI and NFS uses these flags to tell the network/vm layer that it should use memory reserves to fullfill allcation requests for structs like skbs. I am running ceph in a bunch of VMs in my laptop, so this patch was not tested very harshly. Signed-off-by: Mike Christie Reviewed-by: Ilya Dryomov --- net/ceph/messenger.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 559c9f619c2..8d1653caffd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) IPPROTO_TCP, &sock); if (ret) return ret; - sock->sk->sk_allocation = GFP_NOFS; + sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC; #ifdef CONFIG_LOCKDEP lockdep_set_class(&sock->sk->sk_lock, &socket_class); @@ -509,6 +509,9 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } + + sk_set_memalloc(sock->sk); + con->sock = sock; return 0; } @@ -2769,8 +2772,11 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); + unsigned long pflags = current->flags; bool fault; + current->flags |= PF_MEMALLOC; + mutex_lock(&con->mutex); while (true) { int ret; @@ -2824,6 +2830,8 @@ static void con_work(struct work_struct *work) con_fault_finish(con); con->ops->put(con); + + tsk_restore_flags(current, pflags, PF_MEMALLOC); } /* -- cgit v1.2.3-70-g09d2 From a8d4205623ae965e36c68629db306ca0695a2771 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 22 Oct 2014 09:17:24 +0200 Subject: rbd: Fix error recovery in rbd_obj_read_sync() When we fail to allocate page vector in rbd_obj_read_sync() we just basically ignore the problem and continue which will result in an oops later. Fix the problem by returning proper error. CC: Yehuda Sadeh CC: Sage Weil CC: ceph-devel@vger.kernel.org CC: stable@vger.kernel.org Coverity-id: 1226882 Signed-off-by: Jan Kara Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index be8d44af6ae..27b71a0b72d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3533,7 +3533,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, page_count = (u32) calc_pages_for(offset, length); pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); if (IS_ERR(pages)) - ret = PTR_ERR(pages); + return PTR_ERR(pages); ret = -ENOMEM; obj_request = rbd_obj_request_create(object_name, offset, length, -- cgit v1.2.3-70-g09d2 From e9226d7c9f1d83278d78675d51acc07e1a78cb27 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 22 Oct 2014 18:15:37 +0400 Subject: libceph: eliminate unnecessary allocation in process_one_ticket() Commit c27a3e4d667f ("libceph: do not hard code max auth ticket len") while fixing a buffer overlow tried to keep the same as much of the surrounding code as possible and introduced an unnecessary kmalloc() in the unencrypted ticket path. It is likely to fail on huge tickets, so get rid of it. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_x.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index de6662b14e1..7e38b729696 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -149,6 +149,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, struct ceph_crypto_key old_key; void *ticket_buf = NULL; void *tp, *tpend; + void **ptp; struct ceph_timespec new_validity; struct ceph_crypto_key new_session_key; struct ceph_buffer *new_ticket_blob; @@ -208,25 +209,19 @@ static int process_one_ticket(struct ceph_auth_client *ac, goto out; } tp = ticket_buf; - dlen = ceph_decode_32(&tp); + ptp = &tp; + tpend = *ptp + dlen; } else { /* unencrypted */ - ceph_decode_32_safe(p, end, dlen, bad); - ticket_buf = kmalloc(dlen, GFP_NOFS); - if (!ticket_buf) { - ret = -ENOMEM; - goto out; - } - tp = ticket_buf; - ceph_decode_need(p, end, dlen, bad); - ceph_decode_copy(p, ticket_buf, dlen); + ptp = p; + tpend = end; } - tpend = tp + dlen; + ceph_decode_32_safe(ptp, tpend, dlen, bad); dout(" ticket blob is %d bytes\n", dlen); - ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - blob_struct_v = ceph_decode_8(&tp); - new_secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); + ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad); + blob_struct_v = ceph_decode_8(ptp); + new_secret_id = ceph_decode_64(ptp); + ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend); if (ret) goto out; -- cgit v1.2.3-70-g09d2