From 5ecc0a0f8128b1876e8614638deaed49cc8b174c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 6 Oct 2009 11:31:11 -0700 Subject: ceph: CRUSH mapping algorithm CRUSH is a pseudorandom data distribution function designed to map inputs onto a dynamic hierarchy of devices, while minimizing the extent to which inputs are remapped when the devices are added or removed. It includes some features that are specifically useful for storage, most notably the ability to map each input onto a set of N devices that are separated across administrator-defined failure domains. CRUSH is used to distribute data across the cluster of Ceph storage nodes. More information about CRUSH can be found in this paper: http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf Signed-off-by: Sage Weil --- fs/ceph/crush/crush.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 fs/ceph/crush/crush.c (limited to 'fs/ceph/crush/crush.c') diff --git a/fs/ceph/crush/crush.c b/fs/ceph/crush/crush.c new file mode 100644 index 00000000000..13755cdc4fb --- /dev/null +++ b/fs/ceph/crush/crush.c @@ -0,0 +1,140 @@ + +#ifdef __KERNEL__ +# include +#else +# include +# include +# define kfree(x) do { if (x) free(x); } while (0) +# define BUG_ON(x) assert(!(x)) +#endif + +#include "crush.h" + +/** + * crush_get_bucket_item_weight - Get weight of an item in given bucket + * @b: bucket pointer + * @p: item index in bucket + */ +int crush_get_bucket_item_weight(struct crush_bucket *b, int p) +{ + if (p >= b->size) + return 0; + + switch (b->alg) { + case CRUSH_BUCKET_UNIFORM: + return ((struct crush_bucket_uniform *)b)->item_weight; + case CRUSH_BUCKET_LIST: + return ((struct crush_bucket_list *)b)->item_weights[p]; + case CRUSH_BUCKET_TREE: + if (p & 1) + return ((struct crush_bucket_tree *)b)->node_weights[p]; + return 0; + case CRUSH_BUCKET_STRAW: + return ((struct crush_bucket_straw *)b)->item_weights[p]; + } + return 0; +} + +/** + * crush_calc_parents - Calculate parent vectors for the given crush map. + * @map: crush_map pointer + */ +void crush_calc_parents(struct crush_map *map) +{ + int i, b, c; + + for (b = 0; b < map->max_buckets; b++) { + if (map->buckets[b] == NULL) + continue; + for (i = 0; i < map->buckets[b]->size; i++) { + c = map->buckets[b]->items[i]; + BUG_ON(c >= map->max_devices || + c < -map->max_buckets); + if (c >= 0) + map->device_parents[c] = map->buckets[b]->id; + else + map->bucket_parents[-1-c] = map->buckets[b]->id; + } + } +} + +void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) +{ + kfree(b->h.perm); + kfree(b->h.items); + kfree(b); +} + +void crush_destroy_bucket_list(struct crush_bucket_list *b) +{ + kfree(b->item_weights); + kfree(b->sum_weights); + kfree(b->h.perm); + kfree(b->h.items); + kfree(b); +} + +void crush_destroy_bucket_tree(struct crush_bucket_tree *b) +{ + kfree(b->node_weights); + kfree(b); +} + +void crush_destroy_bucket_straw(struct crush_bucket_straw *b) +{ + kfree(b->straws); + kfree(b->item_weights); + kfree(b->h.perm); + kfree(b->h.items); + kfree(b); +} + +void crush_destroy_bucket(struct crush_bucket *b) +{ + switch (b->alg) { + case CRUSH_BUCKET_UNIFORM: + crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b); + break; + case CRUSH_BUCKET_LIST: + crush_destroy_bucket_list((struct crush_bucket_list *)b); + break; + case CRUSH_BUCKET_TREE: + crush_destroy_bucket_tree((struct crush_bucket_tree *)b); + break; + case CRUSH_BUCKET_STRAW: + crush_destroy_bucket_straw((struct crush_bucket_straw *)b); + break; + } +} + +/** + * crush_destroy - Destroy a crush_map + * @map: crush_map pointer + */ +void crush_destroy(struct crush_map *map) +{ + int b; + + /* buckets */ + if (map->buckets) { + for (b = 0; b < map->max_buckets; b++) { + if (map->buckets[b] == NULL) + continue; + crush_destroy_bucket(map->buckets[b]); + } + kfree(map->buckets); + } + + /* rules */ + if (map->rules) { + for (b = 0; b < map->max_rules; b++) + kfree(map->rules[b]); + kfree(map->rules); + } + + kfree(map->bucket_parents); + kfree(map->device_parents); + kfree(map); +} + + -- cgit v1.2.3-70-g09d2 From c6cf726316abd613cfb7c325d950f3629f964ec6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 6 Nov 2009 16:39:26 -0800 Subject: ceph: make CRUSH hash functions non-inline These are way to big to be inline. I missed crush/* when doing the inline audit for akpm's review. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 2 +- fs/ceph/README | 1 + fs/ceph/crush/crush.c | 11 ++++++ fs/ceph/crush/crush.h | 11 +----- fs/ceph/crush/hash.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/crush/hash.h | 92 ++++----------------------------------------------- 6 files changed, 107 insertions(+), 96 deletions(-) create mode 100644 fs/ceph/crush/hash.c (limited to 'fs/ceph/crush/crush.c') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 7da6d69dba2..8bad70aac36 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -11,7 +11,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ messenger.o msgpool.o buffer.o \ mds_client.o mdsmap.o \ mon_client.o \ - osd_client.o osdmap.o crush/crush.o crush/mapper.o \ + osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ ceph_fs.o ceph_strings.o ceph_frag.o diff --git a/fs/ceph/README b/fs/ceph/README index 231a1df5eaf..660e00074d5 100644 --- a/fs/ceph/README +++ b/fs/ceph/README @@ -15,3 +15,4 @@ src/crush/crush.h fs/ceph/crush/crush.h src/crush/mapper.c fs/ceph/crush/mapper.c src/crush/mapper.h fs/ceph/crush/mapper.h src/crush/hash.h fs/ceph/crush/hash.h +src/crush/hash.c fs/ceph/crush/hash.c diff --git a/fs/ceph/crush/crush.c b/fs/ceph/crush/crush.c index 13755cdc4fb..fabd302e577 100644 --- a/fs/ceph/crush/crush.c +++ b/fs/ceph/crush/crush.c @@ -10,6 +10,17 @@ #include "crush.h" +const char *crush_bucket_alg_name(int alg) +{ + switch (alg) { + case CRUSH_BUCKET_UNIFORM: return "uniform"; + case CRUSH_BUCKET_LIST: return "list"; + case CRUSH_BUCKET_TREE: return "tree"; + case CRUSH_BUCKET_STRAW: return "straw"; + default: return "unknown"; + } +} + /** * crush_get_bucket_item_weight - Get weight of an item in given bucket * @b: bucket pointer diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h index 9ac7e091126..92c6b3c3a57 100644 --- a/fs/ceph/crush/crush.h +++ b/fs/ceph/crush/crush.h @@ -97,16 +97,7 @@ enum { CRUSH_BUCKET_TREE = 3, CRUSH_BUCKET_STRAW = 4 }; -static inline const char *crush_bucket_alg_name(int alg) -{ - switch (alg) { - case CRUSH_BUCKET_UNIFORM: return "uniform"; - case CRUSH_BUCKET_LIST: return "list"; - case CRUSH_BUCKET_TREE: return "tree"; - case CRUSH_BUCKET_STRAW: return "straw"; - default: return "unknown"; - } -} +extern const char *crush_bucket_alg_name(int alg); struct crush_bucket { __s32 id; /* this'll be negative */ diff --git a/fs/ceph/crush/hash.c b/fs/ceph/crush/hash.c new file mode 100644 index 00000000000..b438c5d2781 --- /dev/null +++ b/fs/ceph/crush/hash.c @@ -0,0 +1,86 @@ + +#include + +/* + * Robert Jenkins' function for mixing 32-bit values + * http://burtleburtle.net/bob/hash/evahash.html + * a, b = random bits, c = input and output + */ +#define crush_hashmix(a, b, c) do { \ + a = a-b; a = a-c; a = a^(c>>13); \ + b = b-c; b = b-a; b = b^(a<<8); \ + c = c-a; c = c-b; c = c^(b>>13); \ + a = a-b; a = a-c; a = a^(c>>12); \ + b = b-c; b = b-a; b = b^(a<<16); \ + c = c-a; c = c-b; c = c^(b>>5); \ + a = a-b; a = a-c; a = a^(c>>3); \ + b = b-c; b = b-a; b = b^(a<<10); \ + c = c-a; c = c-b; c = c^(b>>15); \ + } while (0) + +#define crush_hash_seed 1315423911 + +__u32 crush_hash32(__u32 a) +{ + __u32 hash = crush_hash_seed ^ a; + __u32 b = a; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(b, x, hash); + crush_hashmix(y, a, hash); + return hash; +} + +__u32 crush_hash32_2(__u32 a, __u32 b) +{ + __u32 hash = crush_hash_seed ^ a ^ b; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(x, a, hash); + crush_hashmix(b, y, hash); + return hash; +} + +__u32 crush_hash32_3(__u32 a, __u32 b, __u32 c) +{ + __u32 hash = crush_hash_seed ^ a ^ b ^ c; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(c, x, hash); + crush_hashmix(y, a, hash); + crush_hashmix(b, x, hash); + crush_hashmix(y, c, hash); + return hash; +} + +__u32 crush_hash32_4(__u32 a, __u32 b, __u32 c, __u32 d) +{ + __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(c, d, hash); + crush_hashmix(a, x, hash); + crush_hashmix(y, b, hash); + crush_hashmix(c, x, hash); + crush_hashmix(y, d, hash); + return hash; +} + +__u32 crush_hash32_5(__u32 a, __u32 b, __u32 c, __u32 d, __u32 e) +{ + __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(c, d, hash); + crush_hashmix(e, x, hash); + crush_hashmix(y, a, hash); + crush_hashmix(b, x, hash); + crush_hashmix(y, c, hash); + crush_hashmix(d, x, hash); + crush_hashmix(y, e, hash); + return hash; +} diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h index 42f3312783a..9ce89f85dc7 100644 --- a/fs/ceph/crush/hash.h +++ b/fs/ceph/crush/hash.h @@ -1,90 +1,12 @@ #ifndef _CRUSH_HASH_H #define _CRUSH_HASH_H -/* - * Robert Jenkins' function for mixing 32-bit values - * http://burtleburtle.net/bob/hash/evahash.html - * a, b = random bits, c = input and output - */ -#define crush_hashmix(a, b, c) do { \ - a = a-b; a = a-c; a = a^(c>>13); \ - b = b-c; b = b-a; b = b^(a<<8); \ - c = c-a; c = c-b; c = c^(b>>13); \ - a = a-b; a = a-c; a = a^(c>>12); \ - b = b-c; b = b-a; b = b^(a<<16); \ - c = c-a; c = c-b; c = c^(b>>5); \ - a = a-b; a = a-c; a = a^(c>>3); \ - b = b-c; b = b-a; b = b^(a<<10); \ - c = c-a; c = c-b; c = c^(b>>15); \ - } while (0) - -#define crush_hash_seed 1315423911 - -static inline __u32 crush_hash32(__u32 a) -{ - __u32 hash = crush_hash_seed ^ a; - __u32 b = a; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(b, x, hash); - crush_hashmix(y, a, hash); - return hash; -} - -static inline __u32 crush_hash32_2(__u32 a, __u32 b) -{ - __u32 hash = crush_hash_seed ^ a ^ b; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(x, a, hash); - crush_hashmix(b, y, hash); - return hash; -} - -static inline __u32 crush_hash32_3(__u32 a, __u32 b, __u32 c) -{ - __u32 hash = crush_hash_seed ^ a ^ b ^ c; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(c, x, hash); - crush_hashmix(y, a, hash); - crush_hashmix(b, x, hash); - crush_hashmix(y, c, hash); - return hash; -} - -static inline __u32 crush_hash32_4(__u32 a, __u32 b, __u32 c, - __u32 d) -{ - __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(c, d, hash); - crush_hashmix(a, x, hash); - crush_hashmix(y, b, hash); - crush_hashmix(c, x, hash); - crush_hashmix(y, d, hash); - return hash; -} - -static inline __u32 crush_hash32_5(__u32 a, __u32 b, __u32 c, - __u32 d, __u32 e) -{ - __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(c, d, hash); - crush_hashmix(e, x, hash); - crush_hashmix(y, a, hash); - crush_hashmix(b, x, hash); - crush_hashmix(y, c, hash); - crush_hashmix(d, x, hash); - crush_hashmix(y, e, hash); - return hash; -} +extern __u32 crush_hash32(__u32 a); +extern __u32 crush_hash32_2(__u32 a, __u32 b); +extern __u32 crush_hash32_3(__u32 a, __u32 b, __u32 c); +extern __u32 crush_hash32_4(__u32 a, __u32 b, __u32 c, + __u32 d); +extern __u32 crush_hash32_5(__u32 a, __u32 b, __u32 c, + __u32 d, __u32 e); #endif -- cgit v1.2.3-70-g09d2