summaryrefslogtreecommitdiffstats
path: root/include/linux/dst.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/dst.h')
-rw-r--r--include/linux/dst.h587
1 files changed, 587 insertions, 0 deletions
diff --git a/include/linux/dst.h b/include/linux/dst.h
new file mode 100644
index 00000000000..e26fed84b1a
--- /dev/null
+++ b/include/linux/dst.h
@@ -0,0 +1,587 @@
+/*
+ * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DST_H
+#define __DST_H
+
+#include <linux/types.h>
+#include <linux/connector.h>
+
+#define DST_NAMELEN 32
+#define DST_NAME "dst"
+
+enum {
+ /* Remove node with given id from storage */
+ DST_DEL_NODE = 0,
+ /* Add remote node with given id to the storage */
+ DST_ADD_REMOTE,
+ /* Add local node with given id to the storage to be exported and used by remote peers */
+ DST_ADD_EXPORT,
+ /* Crypto initialization command (hash/cipher used to protect the connection) */
+ DST_CRYPTO,
+ /* Security attributes for given connection (permissions for example) */
+ DST_SECURITY,
+ /* Register given node in the block layer subsystem */
+ DST_START,
+ DST_CMD_MAX
+};
+
+struct dst_ctl
+{
+ /* Storage name */
+ char name[DST_NAMELEN];
+ /* Command flags */
+ __u32 flags;
+ /* Command itself (see above) */
+ __u32 cmd;
+ /* Maximum number of pages per single request in this device */
+ __u32 max_pages;
+ /* Stale/error transaction scanning timeout in milliseconds */
+ __u32 trans_scan_timeout;
+ /* Maximum number of retry sends before completing transaction as broken */
+ __u32 trans_max_retries;
+ /* Storage size */
+ __u64 size;
+};
+
+/* Reply command carries completion status */
+struct dst_ctl_ack
+{
+ struct cn_msg msg;
+ int error;
+ int unused[3];
+};
+
+/*
+ * Unfortunaltely socket address structure is not exported to userspace
+ * and is redefined there.
+ */
+#define SADDR_MAX_DATA 128
+
+struct saddr {
+ /* address family, AF_xxx */
+ unsigned short sa_family;
+ /* 14 bytes of protocol address */
+ char sa_data[SADDR_MAX_DATA];
+ /* Number of bytes used in sa_data */
+ unsigned short sa_data_len;
+};
+
+/* Address structure */
+struct dst_network_ctl
+{
+ /* Socket type: datagram, stream...*/
+ unsigned int type;
+ /* Let me guess, is it a Jupiter diameter? */
+ unsigned int proto;
+ /* Peer's address */
+ struct saddr addr;
+};
+
+struct dst_crypto_ctl
+{
+ /* Cipher and hash names */
+ char cipher_algo[DST_NAMELEN];
+ char hash_algo[DST_NAMELEN];
+
+ /* Key sizes. Can be zero for digest for example */
+ unsigned int cipher_keysize, hash_keysize;
+ /* Alignment. Calculated by the DST itself. */
+ unsigned int crypto_attached_size;
+ /* Number of threads to perform crypto operations */
+ int thread_num;
+};
+
+/* Export security attributes have this bits checked in when client connects */
+#define DST_PERM_READ (1<<0)
+#define DST_PERM_WRITE (1<<1)
+
+/*
+ * Right now it is simple model, where each remote address
+ * is assigned to set of permissions it is allowed to perform.
+ * In real world block device does not know anything but
+ * reading and writing, so it should be more than enough.
+ */
+struct dst_secure_user
+{
+ unsigned int permissions;
+ struct saddr addr;
+};
+
+/*
+ * Export control command: device to export and network address to accept
+ * clients to work with given device
+ */
+struct dst_export_ctl
+{
+ char device[DST_NAMELEN];
+ struct dst_network_ctl ctl;
+};
+
+enum {
+ DST_CFG = 1, /* Request remote configuration */
+ DST_IO, /* IO command */
+ DST_IO_RESPONSE, /* IO response */
+ DST_PING, /* Keepalive message */
+ DST_NCMD_MAX,
+};
+
+struct dst_cmd
+{
+ /* Network command itself, see above */
+ __u32 cmd;
+ /*
+ * Size of the attached data
+ * (in most cases, for READ command it means how many bytes were requested)
+ */
+ __u32 size;
+ /* Crypto size: number of attached bytes with digest/hmac */
+ __u32 csize;
+ /* Here we can carry secret data */
+ __u32 reserved;
+ /* Read/write bits, see how they are encoded in bio structure */
+ __u64 rw;
+ /* BIO flags */
+ __u64 flags;
+ /* Unique command id (like transaction ID) */
+ __u64 id;
+ /* Sector to start IO from */
+ __u64 sector;
+ /* Hash data is placed after this header */
+ __u8 hash[0];
+};
+
+/*
+ * Convert command to/from network byte order.
+ * We do not use hton*() functions, since there is
+ * no 64-bit implementation.
+ */
+static inline void dst_convert_cmd(struct dst_cmd *c)
+{
+ c->cmd = __cpu_to_be32(c->cmd);
+ c->csize = __cpu_to_be32(c->csize);
+ c->size = __cpu_to_be32(c->size);
+ c->sector = __cpu_to_be64(c->sector);
+ c->id = __cpu_to_be64(c->id);
+ c->flags = __cpu_to_be64(c->flags);
+ c->rw = __cpu_to_be64(c->rw);
+}
+
+/* Transaction id */
+typedef __u64 dst_gen_t;
+
+#ifdef __KERNEL__
+
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/device.h>
+#include <linux/mempool.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/rbtree.h>
+
+#ifdef CONFIG_DST_DEBUG
+#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
+#else
+static inline void __attribute__ ((format (printf, 1, 2)))
+ dprintk(const char *fmt, ...) {}
+#endif
+
+struct dst_node;
+
+struct dst_trans
+{
+ /* DST node we are working with */
+ struct dst_node *n;
+
+ /* Entry inside transaction tree */
+ struct rb_node trans_entry;
+
+ /* Merlin kills this transaction when this memory cell equals zero */
+ atomic_t refcnt;
+
+ /* How this transaction should be processed by crypto engine */
+ short enc;
+ /* How many times this transaction was resent */
+ short retries;
+ /* Completion status */
+ int error;
+
+ /* When did we send it to the remote peer */
+ long send_time;
+
+ /* My name is...
+ * Well, computers does not speak, they have unique id instead */
+ dst_gen_t gen;
+
+ /* Block IO we are working with */
+ struct bio *bio;
+
+ /* Network command for above block IO request */
+ struct dst_cmd cmd;
+};
+
+struct dst_crypto_engine
+{
+ /* What should we do with all block requests */
+ struct crypto_hash *hash;
+ struct crypto_ablkcipher *cipher;
+
+ /* Pool of pages used to encrypt data into before sending */
+ int page_num;
+ struct page **pages;
+
+ /* What to do with current request */
+ int enc;
+ /* Who we are and where do we go */
+ struct scatterlist *src, *dst;
+
+ /* Maximum timeout waiting for encryption to be completed */
+ long timeout;
+ /* IV is a 64-bit sequential counter */
+ u64 iv;
+
+ /* Secret data */
+ void *private;
+
+ /* Cached temporary data lives here */
+ int size;
+ void *data;
+};
+
+struct dst_state
+{
+ /* The main state protection */
+ struct mutex state_lock;
+
+ /* Polling machinery for sockets */
+ wait_queue_t wait;
+ wait_queue_head_t *whead;
+ /* Most of events are being waited here */
+ wait_queue_head_t thread_wait;
+
+ /* Who owns this? */
+ struct dst_node *node;
+
+ /* Network address for this state */
+ struct dst_network_ctl ctl;
+
+ /* Permissions to work with: read-only or rw connection */
+ u32 permissions;
+
+ /* Called when we need to clean private data */
+ void (* cleanup)(struct dst_state *st);
+
+ /* Used by the server: BIO completion queues BIOs here */
+ struct list_head request_list;
+ spinlock_t request_lock;
+
+ /* Guess what? No, it is not number of planets */
+ atomic_t refcnt;
+
+ /* This flags is set when connection should be dropped */
+ int need_exit;
+
+ /*
+ * Socket to work with. Second pointer is used for
+ * lockless check if socket was changed before performing
+ * next action (like working with cached polling result)
+ */
+ struct socket *socket, *read_socket;
+
+ /* Cached preallocated data */
+ void *data;
+ unsigned int size;
+
+ /* Currently processed command */
+ struct dst_cmd cmd;
+};
+
+struct dst_info
+{
+ /* Device size */
+ u64 size;
+
+ /* Local device name for export devices */
+ char local[DST_NAMELEN];
+
+ /* Network setup */
+ struct dst_network_ctl net;
+
+ /* Sysfs bits use this */
+ struct device device;
+};
+
+struct dst_node
+{
+ struct list_head node_entry;
+
+ /* Hi, my name is stored here */
+ char name[DST_NAMELEN];
+ /* My cache name is stored here */
+ char cache_name[DST_NAMELEN];
+
+ /* Block device attached to given node.
+ * Only valid for exporting nodes */
+ struct block_device *bdev;
+ /* Network state machine for given peer */
+ struct dst_state *state;
+
+ /* Block IO machinery */
+ struct request_queue *queue;
+ struct gendisk *disk;
+
+ /* Number of threads in processing pool */
+ int thread_num;
+ /* Maximum number of pages in single IO */
+ int max_pages;
+
+ /* I'm that big in bytes */
+ loff_t size;
+
+ /* Exported to userspace node information */
+ struct dst_info *info;
+
+ /*
+ * Security attribute list.
+ * Used only by exporting node currently.
+ */
+ struct list_head security_list;
+ struct mutex security_lock;
+
+ /*
+ * When this unerflows below zero, university collapses.
+ * But this will not happen, since node will be freed,
+ * when reference counter reaches zero.
+ */
+ atomic_t refcnt;
+
+ /* How precisely should I be started? */
+ int (*start)(struct dst_node *);
+
+ /* Crypto capabilities */
+ struct dst_crypto_ctl crypto;
+ u8 *hash_key;
+ u8 *cipher_key;
+
+ /* Pool of processing thread */
+ struct thread_pool *pool;
+
+ /* Transaction IDs live here */
+ atomic_long_t gen;
+
+ /*
+ * How frequently and how many times transaction
+ * tree should be scanned to drop stale objects.
+ */
+ long trans_scan_timeout;
+ int trans_max_retries;
+
+ /* Small gnomes live here */
+ struct rb_root trans_root;
+ struct mutex trans_lock;
+
+ /*
+ * Transaction cache/memory pool.
+ * It is big enough to contain not only transaction
+ * itself, but additional crypto data (digest/hmac).
+ */
+ struct kmem_cache *trans_cache;
+ mempool_t *trans_pool;
+
+ /* This entity scans transaction tree */
+ struct delayed_work trans_work;
+
+ wait_queue_head_t wait;
+};
+
+/* Kernel representation of the security attribute */
+struct dst_secure
+{
+ struct list_head sec_entry;
+ struct dst_secure_user sec;
+};
+
+int dst_process_bio(struct dst_node *n, struct bio *bio);
+
+int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
+int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
+
+static inline struct dst_state *dst_state_get(struct dst_state *st)
+{
+ BUG_ON(atomic_read(&st->refcnt) == 0);
+ atomic_inc(&st->refcnt);
+ return st;
+}
+
+void dst_state_put(struct dst_state *st);
+
+struct dst_state *dst_state_alloc(struct dst_node *n);
+int dst_state_socket_create(struct dst_state *st);
+void dst_state_socket_release(struct dst_state *st);
+
+void dst_state_exit_connected(struct dst_state *st);
+
+int dst_state_schedule_receiver(struct dst_state *st);
+
+void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
+
+static inline void dst_state_lock(struct dst_state *st)
+{
+ mutex_lock(&st->state_lock);
+}
+
+static inline void dst_state_unlock(struct dst_state *st)
+{
+ mutex_unlock(&st->state_lock);
+}
+
+void dst_poll_exit(struct dst_state *st);
+int dst_poll_init(struct dst_state *st);
+
+static inline unsigned int dst_state_poll(struct dst_state *st)
+{
+ unsigned int revents = POLLHUP | POLLERR;
+
+ dst_state_lock(st);
+ if (st->socket)
+ revents = st->socket->ops->poll(NULL, st->socket, NULL);
+ dst_state_unlock(st);
+
+ return revents;
+}
+
+static inline int dst_thread_setup(void *private, void *data)
+{
+ return 0;
+}
+
+void dst_node_put(struct dst_node *n);
+
+static inline struct dst_node *dst_node_get(struct dst_node *n)
+{
+ atomic_inc(&n->refcnt);
+ return n;
+}
+
+int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
+int dst_recv_cdata(struct dst_state *st, void *cdata);
+int dst_data_send_header(struct socket *sock,
+ void *data, unsigned int size, int more);
+
+int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
+
+int dst_process_io(struct dst_state *st);
+int dst_export_crypto(struct dst_node *n, struct bio *bio);
+int dst_export_send_bio(struct bio *bio);
+int dst_start_export(struct dst_node *n);
+
+int __init dst_export_init(void);
+void dst_export_exit(void);
+
+/* Private structure for export block IO requests */
+struct dst_export_priv
+{
+ struct list_head request_entry;
+ struct dst_state *state;
+ struct bio *bio;
+ struct dst_cmd cmd;
+};
+
+static inline void dst_trans_get(struct dst_trans *t)
+{
+ atomic_inc(&t->refcnt);
+}
+
+struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
+int dst_trans_remove(struct dst_trans *t);
+int dst_trans_remove_nolock(struct dst_trans *t);
+void dst_trans_put(struct dst_trans *t);
+
+/*
+ * Convert bio into network command.
+ */
+static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
+ u32 command, u64 id)
+{
+ cmd->cmd = command;
+ cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
+ cmd->rw = bio->bi_rw;
+ cmd->size = bio->bi_size;
+ cmd->csize = 0;
+ cmd->id = id;
+ cmd->sector = bio->bi_sector;
+};
+
+int dst_trans_send(struct dst_trans *t);
+int dst_trans_crypto(struct dst_trans *t);
+
+int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
+void dst_node_crypto_exit(struct dst_node *n);
+
+static inline int dst_need_crypto(struct dst_node *n)
+{
+ struct dst_crypto_ctl *c = &n->crypto;
+ /*
+ * Logical OR is appropriate here, but boolean one produces
+ * more optimal code, so it is used instead.
+ */
+ return (c->hash_algo[0] | c->cipher_algo[0]);
+}
+
+int dst_node_trans_init(struct dst_node *n, unsigned int size);
+void dst_node_trans_exit(struct dst_node *n);
+
+/*
+ * Pool of threads.
+ * Ready list contains threads currently free to be used,
+ * active one contains threads with some work scheduled for them.
+ * Caller can wait in given queue when thread is ready.
+ */
+struct thread_pool
+{
+ int thread_num;
+ struct mutex thread_lock;
+ struct list_head ready_list, active_list;
+
+ wait_queue_head_t wait;
+};
+
+void thread_pool_del_worker(struct thread_pool *p);
+void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
+int thread_pool_add_worker(struct thread_pool *p,
+ char *name,
+ unsigned int id,
+ void *(* init)(void *data),
+ void (* cleanup)(void *data),
+ void *data);
+
+void thread_pool_destroy(struct thread_pool *p);
+struct thread_pool *thread_pool_create(int num, char *name,
+ void *(* init)(void *data),
+ void (* cleanup)(void *data),
+ void *data);
+
+int thread_pool_schedule(struct thread_pool *p,
+ int (* setup)(void *stored_private, void *setup_data),
+ int (* action)(void *stored_private, void *setup_data),
+ void *setup_data, long timeout);
+int thread_pool_schedule_private(struct thread_pool *p,
+ int (* setup)(void *private, void *data),
+ int (* action)(void *private, void *data),
+ void *data, long timeout, void *id);
+
+#endif /* __KERNEL__ */
+#endif /* __DST_H */