summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-core.c14
-rw-r--r--block/blk-merge.c6
-rw-r--r--block/bsg.c2
-rw-r--r--block/cfq-iosched.c178
-rw-r--r--block/cmd-filter.c233
-rw-r--r--block/elevator.c8
-rw-r--r--block/scsi_ioctl.c44
8 files changed, 155 insertions, 332 deletions
diff --git a/block/Makefile b/block/Makefile
index e9fa4dd690f..6c54ed0ff75 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
+ ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b06cf5c2a82..4b45435c6ea 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -595,8 +595,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
q->sg_reserved_size = INT_MAX;
- blk_set_cmd_filter_defaults(&q->cmd_filter);
-
/*
* all done
*/
@@ -1172,6 +1170,11 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const int unplug = bio_unplug(bio);
int rw_flags;
+ if (bio_barrier(bio) && bio_has_data(bio) &&
+ (q->next_ordered == QUEUE_ORDERED_NONE)) {
+ bio_endio(bio, -EOPNOTSUPP);
+ return 0;
+ }
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1472,11 +1475,6 @@ static inline void __generic_make_request(struct bio *bio)
err = -EOPNOTSUPP;
goto end_io;
}
- if (bio_barrier(bio) && bio_has_data(bio) &&
- (q->next_ordered == QUEUE_ORDERED_NONE)) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
ret = q->make_request_fn(q, bio);
} while (ret);
@@ -2365,7 +2363,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
__bio_clone(bio, bio_src);
if (bio_integrity(bio_src) &&
- bio_integrity_clone(bio, bio_src, gfp_mask))
+ bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out;
if (bio_ctr && bio_ctr(bio, bio_src, data))
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 39ce64432ba..e1999679a4d 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -350,6 +350,12 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (blk_integrity_rq(req) != blk_integrity_rq(next))
return 0;
+ /* don't merge requests of different failfast settings */
+ if (blk_failfast_dev(req) != blk_failfast_dev(next) ||
+ blk_failfast_transport(req) != blk_failfast_transport(next) ||
+ blk_failfast_driver(req) != blk_failfast_driver(next))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
diff --git a/block/bsg.c b/block/bsg.c
index e7d47525424..5f184bb3ff9 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm))
+ if (blk_verify_command(rq->cmd, has_write_perm))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 833ec18eaa6..fd7080ed793 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -71,6 +71,51 @@ struct cfq_rb_root {
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
/*
+ * Per process-grouping structure
+ */
+struct cfq_queue {
+ /* reference count */
+ atomic_t ref;
+ /* various state flags, see below */
+ unsigned int flags;
+ /* parent cfq_data */
+ struct cfq_data *cfqd;
+ /* service_tree member */
+ struct rb_node rb_node;
+ /* service_tree key */
+ unsigned long rb_key;
+ /* prio tree member */
+ struct rb_node p_node;
+ /* prio tree root we belong to, if any */
+ struct rb_root *p_root;
+ /* sorted list of pending requests */
+ struct rb_root sort_list;
+ /* if fifo isn't expired, next request to serve */
+ struct request *next_rq;
+ /* requests queued in sort_list */
+ int queued[2];
+ /* currently allocated requests */
+ int allocated[2];
+ /* fifo list of requests in sort_list */
+ struct list_head fifo;
+
+ unsigned long slice_end;
+ long slice_resid;
+ unsigned int slice_dispatch;
+
+ /* pending metadata requests */
+ int meta_pending;
+ /* number of requests that are on the dispatch list or inside driver */
+ int dispatched;
+
+ /* io prio of this group */
+ unsigned short ioprio, org_ioprio;
+ unsigned short ioprio_class, org_ioprio_class;
+
+ pid_t pid;
+};
+
+/*
* Per block device queue structure
*/
struct cfq_data {
@@ -135,51 +180,11 @@ struct cfq_data {
unsigned int cfq_slice_idle;
struct list_head cic_list;
-};
-/*
- * Per process-grouping structure
- */
-struct cfq_queue {
- /* reference count */
- atomic_t ref;
- /* various state flags, see below */
- unsigned int flags;
- /* parent cfq_data */
- struct cfq_data *cfqd;
- /* service_tree member */
- struct rb_node rb_node;
- /* service_tree key */
- unsigned long rb_key;
- /* prio tree member */
- struct rb_node p_node;
- /* prio tree root we belong to, if any */
- struct rb_root *p_root;
- /* sorted list of pending requests */
- struct rb_root sort_list;
- /* if fifo isn't expired, next request to serve */
- struct request *next_rq;
- /* requests queued in sort_list */
- int queued[2];
- /* currently allocated requests */
- int allocated[2];
- /* fifo list of requests in sort_list */
- struct list_head fifo;
-
- unsigned long slice_end;
- long slice_resid;
- unsigned int slice_dispatch;
-
- /* pending metadata requests */
- int meta_pending;
- /* number of requests that are on the dispatch list or inside driver */
- int dispatched;
-
- /* io prio of this group */
- unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class, org_ioprio_class;
-
- pid_t pid;
+ /*
+ * Fallback dummy cfqq for extreme OOM conditions
+ */
+ struct cfq_queue oom_cfqq;
};
enum cfqq_state_flags {
@@ -1641,6 +1646,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
ioc->ioprio_changed = 0;
}
+static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ pid_t pid, int is_sync)
+{
+ RB_CLEAR_NODE(&cfqq->rb_node);
+ RB_CLEAR_NODE(&cfqq->p_node);
+ INIT_LIST_HEAD(&cfqq->fifo);
+
+ atomic_set(&cfqq->ref, 0);
+ cfqq->cfqd = cfqd;
+
+ cfq_mark_cfqq_prio_changed(cfqq);
+
+ if (is_sync) {
+ if (!cfq_class_idle(cfqq))
+ cfq_mark_cfqq_idle_window(cfqq);
+ cfq_mark_cfqq_sync(cfqq);
+ }
+ cfqq->pid = pid;
+}
+
static struct cfq_queue *
cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
struct io_context *ioc, gfp_t gfp_mask)
@@ -1653,56 +1678,40 @@ retry:
/* cic always exists here */
cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq) {
+ /*
+ * Always try a new alloc if we fell back to the OOM cfqq
+ * originally, since it should just be a temporary situation.
+ */
+ if (!cfqq || cfqq == &cfqd->oom_cfqq) {
+ cfqq = NULL;
if (new_cfqq) {
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
- /*
- * Inform the allocator of the fact that we will
- * just repeat this allocation if it fails, to allow
- * the allocator to do whatever it needs to attempt to
- * free memory.
- */
spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool,
- gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
+ gfp_mask | __GFP_ZERO,
cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock);
- goto retry;
+ if (new_cfqq)
+ goto retry;
} else {
cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
- if (!cfqq)
- goto out;
}
- RB_CLEAR_NODE(&cfqq->rb_node);
- RB_CLEAR_NODE(&cfqq->p_node);
- INIT_LIST_HEAD(&cfqq->fifo);
-
- atomic_set(&cfqq->ref, 0);
- cfqq->cfqd = cfqd;
-
- cfq_mark_cfqq_prio_changed(cfqq);
-
- cfq_init_prio_data(cfqq, ioc);
-
- if (is_sync) {
- if (!cfq_class_idle(cfqq))
- cfq_mark_cfqq_idle_window(cfqq);
- cfq_mark_cfqq_sync(cfqq);
- }
- cfqq->pid = current->pid;
- cfq_log_cfqq(cfqd, cfqq, "alloced");
+ if (cfqq) {
+ cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
+ cfq_init_prio_data(cfqq, ioc);
+ cfq_log_cfqq(cfqd, cfqq, "alloced");
+ } else
+ cfqq = &cfqd->oom_cfqq;
}
if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);
-out:
- WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq;
}
@@ -1735,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
cfqq = *async_cfqq;
}
- if (!cfqq) {
+ if (!cfqq)
cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
- if (!cfqq)
- return NULL;
- }
/*
* pin the queue now that it's allocated, scheduler exit will prune it
@@ -2305,12 +2311,8 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
goto queue_fail;
cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq) {
+ if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
-
- if (!cfqq)
- goto queue_fail;
-
cic_set_cfqq(cic, cfqq, is_sync);
}
@@ -2465,6 +2467,14 @@ static void *cfq_init_queue(struct request_queue *q)
for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT;
+ /*
+ * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
+ * Grab a permanent reference to it, so that the normal code flow
+ * will not attempt to free it.
+ */
+ cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
+ atomic_inc(&cfqd->oom_cfqq.ref);
+
INIT_LIST_HEAD(&cfqd->cic_list);
cfqd->queue = q;
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
deleted file mode 100644
index 572bbc2f900..00000000000
--- a/block/cmd-filter.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright 2004 Peter M. Jones <pjones@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- *
- */
-
-#include <linux/list.h>
-#include <linux/genhd.h>
-#include <linux/spinlock.h>
-#include <linux/capability.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-
-#include <scsi/scsi.h>
-#include <linux/cdrom.h>
-
-int blk_verify_command(struct blk_cmd_filter *filter,
- unsigned char *cmd, fmode_t has_write_perm)
-{
- /* root can do any command. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- /* if there's no filter set, assume we're filtering everything out */
- if (!filter)
- return -EPERM;
-
- /* Anybody who can open the device can do a read-safe command */
- if (test_bit(cmd[0], filter->read_ok))
- return 0;
-
- /* Write-safe commands require a writable open */
- if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
- return 0;
-
- return -EPERM;
-}
-EXPORT_SYMBOL(blk_verify_command);
-
-#if 0
-/* and now, the sysfs stuff */
-static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page,
- int rw)
-{
- char *npage = page;
- unsigned long *okbits;
- int i;
-
- if (rw == READ)
- okbits = filter->read_ok;
- else
- okbits = filter->write_ok;
-
- for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
- if (test_bit(i, okbits)) {
- npage += sprintf(npage, "0x%02x", i);
- if (i < BLK_SCSI_MAX_CMDS - 1)
- sprintf(npage++, " ");
- }
- }
-
- if (npage != page)
- npage += sprintf(npage, "\n");
-
- return npage - page;
-}
-
-static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page)
-{
- return rcf_cmds_show(filter, page, READ);
-}
-
-static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter,
- char *page)
-{
- return rcf_cmds_show(filter, page, WRITE);
-}
-
-static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter,
- const char *page, size_t count, int rw)
-{
- unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
- int cmd, set;
- char *p, *status;
-
- if (rw == READ) {
- memcpy(&okbits, filter->read_ok, sizeof(okbits));
- target_okbits = filter->read_ok;
- } else {
- memcpy(&okbits, filter->write_ok, sizeof(okbits));
- target_okbits = filter->write_ok;
- }
-
- while ((p = strsep((char **)&page, " ")) != NULL) {
- set = 1;
-
- if (p[0] == '+') {
- p++;
- } else if (p[0] == '-') {
- set = 0;
- p++;
- }
-
- cmd = simple_strtol(p, &status, 16);
-
- /* either of these cases means invalid input, so do nothing. */
- if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS)
- return -EINVAL;
-
- if (set)
- __set_bit(cmd, okbits);
- else
- __clear_bit(cmd, okbits);
- }
-
- memcpy(target_okbits, okbits, sizeof(okbits));
- return count;
-}
-
-static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter,
- const char *page, size_t count)
-{
- return rcf_cmds_store(filter, page, count, READ);
-}
-
-static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter,
- const char *page, size_t count)
-{
- return rcf_cmds_store(filter, page, count, WRITE);
-}
-
-struct rcf_sysfs_entry {
- struct attribute attr;
- ssize_t (*show)(struct blk_cmd_filter *, char *);
- ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t);
-};
-
-static struct rcf_sysfs_entry rcf_readcmds_entry = {
- .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
- .show = rcf_readcmds_show,
- .store = rcf_readcmds_store,
-};
-
-static struct rcf_sysfs_entry rcf_writecmds_entry = {
- .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
- .show = rcf_writecmds_show,
- .store = rcf_writecmds_store,
-};
-
-static struct attribute *default_attrs[] = {
- &rcf_readcmds_entry.attr,
- &rcf_writecmds_entry.attr,
- NULL,
-};
-
-#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
-
-static ssize_t
-rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
-{
- struct rcf_sysfs_entry *entry = to_rcf(attr);
- struct blk_cmd_filter *filter;
-
- filter = container_of(kobj, struct blk_cmd_filter, kobj);
- if (entry->show)
- return entry->show(filter, page);
-
- return 0;
-}
-
-static ssize_t
-rcf_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *page, size_t length)
-{
- struct rcf_sysfs_entry *entry = to_rcf(attr);
- struct blk_cmd_filter *filter;
-
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- if (!entry->store)
- return -EINVAL;
-
- filter = container_of(kobj, struct blk_cmd_filter, kobj);
- return entry->store(filter, page, length);
-}
-
-static struct sysfs_ops rcf_sysfs_ops = {
- .show = rcf_attr_show,
- .store = rcf_attr_store,
-};
-
-static struct kobj_type rcf_ktype = {
- .sysfs_ops = &rcf_sysfs_ops,
- .default_attrs = default_attrs,
-};
-
-int blk_register_filter(struct gendisk *disk)
-{
- int ret;
- struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-
- ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
- &disk_to_dev(disk)->kobj,
- "%s", "cmd_filter");
- if (ret < 0)
- return ret;
-
- return 0;
-}
-EXPORT_SYMBOL(blk_register_filter);
-
-void blk_unregister_filter(struct gendisk *disk)
-{
- struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-
- kobject_put(&filter->kobj);
-}
-EXPORT_SYMBOL(blk_unregister_filter);
-#endif
diff --git a/block/elevator.c b/block/elevator.c
index ca861927ba4..6f2375339a9 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -100,6 +100,14 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return 0;
+ /*
+ * Don't merge if failfast settings don't match
+ */
+ if (bio_failfast_dev(bio) != blk_failfast_dev(rq) ||
+ bio_failfast_transport(bio) != blk_failfast_transport(rq) ||
+ bio_failfast_driver(bio) != blk_failfast_driver(rq))
+ return 0;
+
if (!elv_iosched_allow_merge(rq, bio))
return 0;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 5f8e798ede4..e5b10017a50 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -32,6 +32,11 @@
#include <scsi/scsi_ioctl.h>
#include <scsi/scsi_cmnd.h>
+struct blk_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+} blk_default_cmd_filter;
+
/* Command group 3 is reserved and should never be used. */
const unsigned char scsi_command_size_tbl[8] =
{
@@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p);
}
-void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
+static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
{
/* Basic read-only commands */
__set_bit(TEST_UNIT_READY, filter->read_ok);
@@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
}
-EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
+
+int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm)
+{
+ struct blk_cmd_filter *filter = &blk_default_cmd_filter;
+
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_verify_command);
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_hdr *hdr, fmode_t mode)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE))
+ if (blk_verify_command(rq->cmd, mode & FMODE_WRITE))
return -EPERM;
/*
@@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE);
+ err = blk_verify_command(rq->cmd, mode & FMODE_WRITE);
if (err)
goto error;
@@ -645,5 +673,11 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
blk_put_queue(q);
return err;
}
-
EXPORT_SYMBOL(scsi_cmd_ioctl);
+
+int __init blk_scsi_ioctl_init(void)
+{
+ blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
+ return 0;
+}
+fs_initcall(blk_scsi_ioctl_init);