summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
committerIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
commitbcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch)
treee420679a5db6ea4e1694eef57f9abb6acac8d4d3 /drivers/md/dm-raid.c
parent26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff)
parent000078bc3ee69efb1124b8478c7527389a826074 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Fix include order for bison/flex-generated C files, from Ben Hutchings * Build fixes and documentation corrections from David Ahern * Group parsing support, from Jiri Olsa * UI/gtk refactorings and improvements from Namhyung Kim * NULL deref fix for perf script, from Namhyung Kim * Assorted cleanups from Robert Richter * Let O= makes handle relative paths, from Steven Rostedt * perf script python fixes, from Feng Tang. * Improve 'perf lock' error message when the needed tracepoints are not present, from David Ahern. * Initial bash completion support, from Frederic Weisbecker * Allow building without libelf, from Namhyung Kim. * Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. * Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. * Add a description for the JIT interface, from Andi Kleen. * Assorted fixes for Documentation and build in 32 bit, from Robert Richter * Add support for non-tracepoint events in perf script python, from Feng Tang * Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c147
1 files changed, 123 insertions, 24 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 017c34d78d6..982e3e390c4 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -11,6 +11,7 @@
#include "md.h"
#include "raid1.h"
#include "raid5.h"
+#include "raid10.h"
#include "bitmap.h"
#include <linux/device-mapper.h>
@@ -52,7 +53,10 @@ struct raid_dev {
#define DMPF_MAX_RECOVERY_RATE 0x20
#define DMPF_MAX_WRITE_BEHIND 0x40
#define DMPF_STRIPE_CACHE 0x80
-#define DMPF_REGION_SIZE 0X100
+#define DMPF_REGION_SIZE 0x100
+#define DMPF_RAID10_COPIES 0x200
+#define DMPF_RAID10_FORMAT 0x400
+
struct raid_set {
struct dm_target *ti;
@@ -76,6 +80,7 @@ static struct raid_type {
const unsigned algorithm; /* RAID algorithm. */
} raid_types[] = {
{"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
+ {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */},
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -86,6 +91,17 @@ static struct raid_type {
{"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
};
+static unsigned raid10_md_layout_to_copies(int layout)
+{
+ return layout & 0xFF;
+}
+
+static int raid10_format_to_md_layout(char *format, unsigned copies)
+{
+ /* 1 "far" copy, and 'copies' "near" copies */
+ return (1 << 8) | (copies & 0xFF);
+}
+
static struct raid_type *get_raid_type(char *name)
{
int i;
@@ -101,20 +117,12 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
{
unsigned i;
struct raid_set *rs;
- sector_t sectors_per_dev;
if (raid_devs <= raid_type->parity_devs) {
ti->error = "Insufficient number of devices";
return ERR_PTR(-EINVAL);
}
- sectors_per_dev = ti->len;
- if ((raid_type->level > 1) &&
- sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
- ti->error = "Target length not divisible by number of data devices";
- return ERR_PTR(-EINVAL);
- }
-
rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
if (!rs) {
ti->error = "Cannot allocate raid context";
@@ -128,7 +136,6 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
rs->md.raid_disks = raid_devs;
rs->md.level = raid_type->level;
rs->md.new_level = rs->md.level;
- rs->md.dev_sectors = sectors_per_dev;
rs->md.layout = raid_type->algorithm;
rs->md.new_layout = rs->md.layout;
rs->md.delta_disks = 0;
@@ -143,6 +150,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
* rs->md.external
* rs->md.chunk_sectors
* rs->md.new_chunk_sectors
+ * rs->md.dev_sectors
*/
return rs;
@@ -347,12 +355,20 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
* [region_size <sectors>] Defines granularity of bitmap
+ *
+ * RAID10-only options:
+ * [raid10_copies <# copies>] Number of copies. (Default: 2)
+ * [raid10_format <near>] Layout algorithm. (Default: near)
*/
static int parse_raid_params(struct raid_set *rs, char **argv,
unsigned num_raid_params)
{
+ char *raid10_format = "near";
+ unsigned raid10_copies = 2;
unsigned i, rebuild_cnt = 0;
unsigned long value, region_size = 0;
+ sector_t sectors_per_dev = rs->ti->len;
+ sector_t max_io_len;
char *key;
/*
@@ -422,20 +438,53 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
}
key = argv[i++];
+
+ /* Parameters that take a string value are checked here. */
+ if (!strcasecmp(key, "raid10_format")) {
+ if (rs->raid_type->level != 10) {
+ rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type";
+ return -EINVAL;
+ }
+ if (strcmp("near", argv[i])) {
+ rs->ti->error = "Invalid 'raid10_format' value given";
+ return -EINVAL;
+ }
+ raid10_format = argv[i];
+ rs->print_flags |= DMPF_RAID10_FORMAT;
+ continue;
+ }
+
if (strict_strtoul(argv[i], 10, &value) < 0) {
rs->ti->error = "Bad numerical argument given in raid params";
return -EINVAL;
}
+ /* Parameters that take a numeric value are checked here */
if (!strcasecmp(key, "rebuild")) {
rebuild_cnt++;
- if (((rs->raid_type->level != 1) &&
- (rebuild_cnt > rs->raid_type->parity_devs)) ||
- ((rs->raid_type->level == 1) &&
- (rebuild_cnt > (rs->md.raid_disks - 1)))) {
- rs->ti->error = "Too many rebuild devices specified for given RAID type";
+
+ switch (rs->raid_type->level) {
+ case 1:
+ if (rebuild_cnt >= rs->md.raid_disks) {
+ rs->ti->error = "Too many rebuild devices specified";
+ return -EINVAL;
+ }
+ break;
+ case 4:
+ case 5:
+ case 6:
+ if (rebuild_cnt > rs->raid_type->parity_devs) {
+ rs->ti->error = "Too many rebuild devices specified for given RAID type";
+ return -EINVAL;
+ }
+ break;
+ case 10:
+ default:
+ DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name);
+ rs->ti->error = "Rebuild not supported for this RAID type";
return -EINVAL;
}
+
if (value > rs->md.raid_disks) {
rs->ti->error = "Invalid rebuild index given";
return -EINVAL;
@@ -486,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
*/
value /= 2;
- if (rs->raid_type->level < 5) {
+ if ((rs->raid_type->level != 5) &&
+ (rs->raid_type->level != 6)) {
rs->ti->error = "Inappropriate argument: stripe_cache";
return -EINVAL;
}
@@ -511,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
} else if (!strcasecmp(key, "region_size")) {
rs->print_flags |= DMPF_REGION_SIZE;
region_size = value;
+ } else if (!strcasecmp(key, "raid10_copies") &&
+ (rs->raid_type->level == 10)) {
+ if ((value < 2) || (value > 0xFF)) {
+ rs->ti->error = "Bad value for 'raid10_copies'";
+ return -EINVAL;
+ }
+ rs->print_flags |= DMPF_RAID10_COPIES;
+ raid10_copies = value;
} else {
DMERR("Unable to parse RAID parameter: %s", key);
rs->ti->error = "Unable to parse RAID parameters";
@@ -522,14 +580,33 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
return -EINVAL;
if (rs->md.chunk_sectors)
- rs->ti->split_io = rs->md.chunk_sectors;
+ max_io_len = rs->md.chunk_sectors;
else
- rs->ti->split_io = region_size;
+ max_io_len = region_size;
- if (rs->md.chunk_sectors)
- rs->ti->split_io = rs->md.chunk_sectors;
- else
- rs->ti->split_io = region_size;
+ if (dm_set_target_max_io_len(rs->ti, max_io_len))
+ return -EINVAL;
+
+ if (rs->raid_type->level == 10) {
+ if (raid10_copies > rs->md.raid_disks) {
+ rs->ti->error = "Not enough devices to satisfy specification";
+ return -EINVAL;
+ }
+
+ /* (Len * #mirrors) / #devices */
+ sectors_per_dev = rs->ti->len * raid10_copies;
+ sector_div(sectors_per_dev, rs->md.raid_disks);
+
+ rs->md.layout = raid10_format_to_md_layout(raid10_format,
+ raid10_copies);
+ rs->md.new_layout = rs->md.layout;
+ } else if ((rs->raid_type->level > 1) &&
+ sector_div(sectors_per_dev,
+ (rs->md.raid_disks - rs->raid_type->parity_devs))) {
+ rs->ti->error = "Target length not divisible by number of data devices";
+ return -EINVAL;
+ }
+ rs->md.dev_sectors = sectors_per_dev;
/* Assume there are no metadata devices until the drives are parsed */
rs->md.persistent = 0;
@@ -552,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
if (rs->raid_type->level == 1)
return md_raid1_congested(&rs->md, bits);
+ if (rs->raid_type->level == 10)
+ return md_raid10_congested(&rs->md, bits);
+
return md_raid5_congested(&rs->md, bits);
}
@@ -870,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
case 6:
redundancy = rs->raid_type->parity_devs;
break;
+ case 10:
+ redundancy = raid10_md_layout_to_copies(mddev->layout) - 1;
+ break;
default:
ti->error = "Unknown RAID type";
return -EINVAL;
@@ -1035,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
+ if (ti->len != rs->md.array_sectors) {
+ ti->error = "Array size does not match requested target length";
+ ret = -EINVAL;
+ goto size_mismatch;
+ }
rs->callbacks.congested_fn = raid_is_congested;
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
mddev_suspend(&rs->md);
return 0;
+size_mismatch:
+ md_stop(&rs->md);
bad:
context_free(rs);
@@ -1067,7 +1157,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c
}
static int raid_status(struct dm_target *ti, status_type_t type,
- char *result, unsigned maxlen)
+ unsigned status_flags, char *result, unsigned maxlen)
{
struct raid_set *rs = ti->private;
unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -1189,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type,
DMEMIT(" region_size %lu",
rs->md.bitmap_info.chunksize >> 9);
+ if (rs->print_flags & DMPF_RAID10_COPIES)
+ DMEMIT(" raid10_copies %u",
+ raid10_md_layout_to_copies(rs->md.layout));
+
+ if (rs->print_flags & DMPF_RAID10_FORMAT)
+ DMEMIT(" raid10_format near");
+
DMEMIT(" %d", rs->md.raid_disks);
for (i = 0; i < rs->md.raid_disks; i++) {
if (rs->dev[i].meta_dev)
@@ -1263,7 +1360,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
@@ -1290,6 +1387,8 @@ module_init(dm_raid_init);
module_exit(dm_raid_exit);
MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
+MODULE_ALIAS("dm-raid1");
+MODULE_ALIAS("dm-raid10");
MODULE_ALIAS("dm-raid4");
MODULE_ALIAS("dm-raid5");
MODULE_ALIAS("dm-raid6");