summaryrefslogtreecommitdiffstats
path: root/include/linux/ceph/osd_client.h
blob: 1dd5d466b6f9f01980c8e3efb7d905395f6b3fde (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H

#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/mempool.h>
#include <linux/rbtree.h>

#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>

/* 
 * Maximum object name size 
 * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100) 
 */
#define MAX_OBJ_NAME_SIZE 100

struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
struct ceph_osd_client;
struct ceph_authorizer;

/*
 * completion callback for async writepages
 */
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
				     struct ceph_msg *);

/* a given osd we're communicating with */
struct ceph_osd {
	atomic_t o_ref;
	struct ceph_osd_client *o_osdc;
	int o_osd;
	int o_incarnation;
	struct rb_node o_node;
	struct ceph_connection o_con;
	struct list_head o_requests;
	struct list_head o_linger_requests;
	struct list_head o_osd_lru;
	struct ceph_auth_handshake o_auth;
	unsigned long lru_ttl;
	int o_marked_for_keepalive;
	struct list_head o_keepalive_item;
};


#define CEPH_OSD_MAX_OP 10

/* an in-flight request */
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
	struct rb_node  r_node;
	struct list_head r_req_lru_item;
	struct list_head r_osd_item;
	struct list_head r_linger_item;
	struct list_head r_linger_osd;
	struct ceph_osd *r_osd;
	struct ceph_pg   r_pgid;
	int              r_pg_osds[CEPH_PG_MAX_SIZE];
	int              r_num_pg_osds;

	struct ceph_connection *r_con_filling_msg;

	struct ceph_msg  *r_request, *r_reply;
	int               r_flags;     /* any additional flags for the osd */
	u32               r_sent;      /* >0 if r_request is sending/sent */
	int               r_num_ops;

	/* encoded message content */
	struct ceph_osd_op *r_request_ops;
	/* these are updated on each send */
	__le32           *r_request_osdmap_epoch;
	__le32           *r_request_flags;
	__le64           *r_request_pool;
	void             *r_request_pgid;
	__le32           *r_request_attempts;
	struct ceph_eversion *r_request_reassert_version;

	int               r_result;
	int               r_reply_op_len[CEPH_OSD_MAX_OP];
	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
	int               r_got_reply;
	int		  r_linger;

	struct ceph_osd_client *r_osdc;
	struct kref       r_kref;
	bool              r_mempool;
	struct completion r_completion, r_safe_completion;
	ceph_osdc_callback_t r_callback, r_safe_callback;
	struct ceph_eversion r_reassert_version;
	struct list_head  r_unsafe_item;

	struct inode *r_inode;         	      /* for use by callbacks */
	void *r_priv;			      /* ditto */

	char              r_oid[MAX_OBJ_NAME_SIZE];          /* object name */
	int               r_oid_len;
	u64               r_snapid;
	unsigned long     r_stamp;            /* send OR check time */

	struct ceph_file_layout r_file_layout;
	struct ceph_snap_context *r_snapc;    /* snap context for writes */
	unsigned          r_num_pages;        /* size of page array (follows) */
	unsigned          r_page_alignment;   /* io offset in first page */
	struct page     **r_pages;            /* pages for data payload */
	int               r_pages_from_pool;
	int               r_own_pages;        /* if true, i own page list */
#ifdef CONFIG_BLOCK
	struct bio       *r_bio;	      /* instead of pages */
#endif

	struct ceph_pagelist r_trail;	      /* trailing part of the data */
};

struct ceph_osd_event {
	u64 cookie;
	int one_shot;
	struct ceph_osd_client *osdc;
	void (*cb)(u64, u64, u8, void *);
	void *data;
	struct rb_node node;
	struct list_head osd_node;
	struct kref kref;
};

struct ceph_osd_event_work {
	struct work_struct work;
	struct ceph_osd_event *event;
        u64 ver;
        u64 notify_id;
        u8 opcode;
};

struct ceph_osd_client {
	struct ceph_client     *client;

	struct ceph_osdmap     *osdmap;       /* current map */
	struct rw_semaphore    map_sem;
	struct completion      map_waiters;
	u64                    last_requested_map;

	struct mutex           request_mutex;
	struct rb_root         osds;          /* osds */
	struct list_head       osd_lru;       /* idle osds */
	u64                    timeout_tid;   /* tid of timeout triggering rq */
	u64                    last_tid;      /* tid of last request */
	struct rb_root         requests;      /* pending requests */
	struct list_head       req_lru;	      /* in-flight lru */
	struct list_head       req_unsent;    /* unsent/need-resend queue */
	struct list_head       req_notarget;  /* map to no osd */
	struct list_head       req_linger;    /* lingering requests */
	int                    num_requests;
	struct delayed_work    timeout_work;
	struct delayed_work    osds_timeout_work;
#ifdef CONFIG_DEBUG_FS
	struct dentry 	       *debugfs_file;
#endif

	mempool_t              *req_mempool;

	struct ceph_msgpool	msgpool_op;
	struct ceph_msgpool	msgpool_op_reply;

	spinlock_t		event_lock;
	struct rb_root		event_tree;
	u64			event_count;

	struct workqueue_struct	*notify_wq;
};

struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
	u32 payload_len;
	union {
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
		} extent;
		struct {
			const char *name;
			const char  *val;
			u32 name_len;
			u32 value_len;
			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
		} xattr;
		struct {
			const char *class_name;
			const char *method_name;
			const char *indata;
			u32 indata_len;
			__u8 class_len;
			__u8 method_len;
			__u8 argc;
		} cls;
		struct {
			u64 cookie;
			u64 count;
		} pgls;
	        struct {
		        u64 snapid;
	        } snap;
		struct {
			u64 cookie;
			u64 ver;
			u32 prot_ver;
			u32 timeout;
			__u8 flag;
		} watch;
	};
};

extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			  struct ceph_client *client);
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);

extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
				   struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
				 struct ceph_msg *msg);

extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
					       struct ceph_snap_context *snapc,
					       unsigned int num_op,
					       bool use_mempool,
					       gfp_t gfp_flags);

extern void ceph_osdc_build_request(struct ceph_osd_request *req,
				    u64 off, u64 len,
				    unsigned int num_op,
				    struct ceph_osd_req_op *src_ops,
				    struct ceph_snap_context *snapc,
				    u64 snap_id,
				    struct timespec *mtime);

extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
				      struct ceph_file_layout *layout,
				      struct ceph_vino vino,
				      u64 offset, u64 *len, int op, int flags,
				      struct ceph_snap_context *snapc,
				      int do_sync, u32 truncate_seq,
				      u64 truncate_size,
				      struct timespec *mtime,
				      bool use_mempool, int page_align);

extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
					 struct ceph_osd_request *req);
extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
						struct ceph_osd_request *req);

static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
{
	kref_get(&req->r_kref);
}
extern void ceph_osdc_release_request(struct kref *kref);
static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
{
	kref_put(&req->r_kref, ceph_osdc_release_request);
}

extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
				   struct ceph_osd_request *req,
				   bool nofail);
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
				  struct ceph_osd_request *req);
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);

extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
			       struct ceph_vino vino,
			       struct ceph_file_layout *layout,
			       u64 off, u64 *plen,
			       u32 truncate_seq, u64 truncate_size,
			       struct page **pages, int nr_pages,
			       int page_align);

extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
				struct ceph_vino vino,
				struct ceph_file_layout *layout,
				struct ceph_snap_context *sc,
				u64 off, u64 len,
				u32 truncate_seq, u64 truncate_size,
				struct timespec *mtime,
				struct page **pages, int nr_pages);

/* watch/notify events */
extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
				  void (*event_cb)(u64, u64, u8, void *),
				  void *data, struct ceph_osd_event **pevent);
extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
extern void ceph_osdc_put_event(struct ceph_osd_event *event);
#endif