diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-22 16:25:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-22 16:25:25 -0700 |
commit | 0adfc56ce8fdc5c17630434e49f30536ba7b8559 (patch) | |
tree | ed63f34e74998a8a1550d4af61b3178e68a5d60d /include/linux | |
parent | f23eb2b2b28547fc70df82dd5049eb39bec5ba12 (diff) | |
parent | 59c2be1e4d42c0d4949cecdeef3f37070a1fbc13 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
rbd: use watch/notify for changes in rbd header
libceph: add lingering request and watch/notify event framework
rbd: update email address in Documentation
ceph: rename dentry_release -> d_release, fix comment
ceph: add request to the tail of unsafe write list
ceph: remove request from unsafe list if it is canceled/timed out
ceph: move readahead default to fs/ceph from libceph
ceph: add ino32 mount option
ceph: update common header files
ceph: remove debugfs debug cruft
libceph: fix osd request queuing on osdmap updates
ceph: preserve I_COMPLETE across rename
libceph: Fix base64-decoding when input ends in newline.
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 19 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 1 | ||||
-rw-r--r-- | include/linux/ceph/osd_client.h | 57 | ||||
-rw-r--r-- | include/linux/ceph/rados.h | 39 |
4 files changed, 100 insertions, 16 deletions
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 09dcc0c2ffd..b8e995fbd86 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -136,9 +136,18 @@ struct ceph_dir_layout { /* osd */ -#define CEPH_MSG_OSD_MAP 41 -#define CEPH_MSG_OSD_OP 42 -#define CEPH_MSG_OSD_OPREPLY 43 +#define CEPH_MSG_OSD_MAP 41 +#define CEPH_MSG_OSD_OP 42 +#define CEPH_MSG_OSD_OPREPLY 43 +#define CEPH_MSG_WATCH_NOTIFY 44 + + +/* watch-notify operations */ +enum { + WATCH_NOTIFY = 1, /* notifying watcher */ + WATCH_NOTIFY_COMPLETE = 2, /* notifier notified when done */ +}; + /* pool operations */ enum { @@ -213,8 +222,10 @@ struct ceph_client_mount { struct ceph_mon_request_header monhdr; } __attribute__ ((packed)); +#define CEPH_SUBSCRIBE_ONETIME 1 /* i want only 1 update after have */ + struct ceph_mon_subscribe_item { - __le64 have_version; __le64 have; + __le64 have_version; __le64 have; __u8 onetime; } __attribute__ ((packed)); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 72c72bfccb8..0d2e0fffb47 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -71,7 +71,6 @@ struct ceph_options { #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ #define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_IDLE_TTL_DEFAULT 60 -#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index a1af29648fb..f88eacb111d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -32,6 +32,7 @@ struct ceph_osd { struct rb_node o_node; struct ceph_connection o_con; struct list_head o_requests; + struct list_head o_linger_requests; struct list_head o_osd_lru; struct ceph_authorizer *o_authorizer; void *o_authorizer_buf, *o_authorizer_reply_buf; @@ -47,6 +48,8 @@ struct ceph_osd_request { struct rb_node r_node; struct list_head r_req_lru_item; struct list_head r_osd_item; + struct list_head r_linger_item; + struct list_head r_linger_osd; struct ceph_osd *r_osd; struct ceph_pg r_pgid; int r_pg_osds[CEPH_PG_MAX_SIZE]; @@ -59,6 +62,7 @@ struct ceph_osd_request { int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ int r_got_reply; + int r_linger; struct ceph_osd_client *r_osdc; struct kref r_kref; @@ -74,7 +78,6 @@ struct ceph_osd_request { char r_oid[40]; /* object name */ int r_oid_len; unsigned long r_stamp; /* send OR check time */ - bool r_resend; /* msg send failed, needs retry */ struct ceph_file_layout r_file_layout; struct ceph_snap_context *r_snapc; /* snap context for writes */ @@ -90,6 +93,26 @@ struct ceph_osd_request { struct ceph_pagelist *r_trail; /* trailing part of the data */ }; +struct ceph_osd_event { + u64 cookie; + int one_shot; + struct ceph_osd_client *osdc; + void (*cb)(u64, u64, u8, void *); + void *data; + struct rb_node node; + struct list_head osd_node; + struct kref kref; + struct completion completion; +}; + +struct ceph_osd_event_work { + struct work_struct work; + struct ceph_osd_event *event; + u64 ver; + u64 notify_id; + u8 opcode; +}; + struct ceph_osd_client { struct ceph_client *client; @@ -104,7 +127,10 @@ struct ceph_osd_client { u64 timeout_tid; /* tid of timeout triggering rq */ u64 last_tid; /* tid of last request */ struct rb_root requests; /* pending requests */ - struct list_head req_lru; /* pending requests lru */ + struct list_head req_lru; /* in-flight lru */ + struct list_head req_unsent; /* unsent/need-resend queue */ + struct list_head req_notarget; /* map to no osd */ + struct list_head req_linger; /* lingering requests */ int num_requests; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; @@ -116,6 +142,12 @@ struct ceph_osd_client { struct ceph_msgpool msgpool_op; struct ceph_msgpool msgpool_op_reply; + + spinlock_t event_lock; + struct rb_root event_tree; + u64 event_count; + + struct workqueue_struct *notify_wq; }; struct ceph_osd_req_op { @@ -150,6 +182,13 @@ struct ceph_osd_req_op { struct { u64 snapid; } snap; + struct { + u64 cookie; + u64 ver; + __u8 flag; + u32 prot_ver; + u32 timeout; + } watch; }; u32 payload_len; }; @@ -198,6 +237,11 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, bool use_mempool, int num_reply, int page_align); +extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); +extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); + static inline void ceph_osdc_get_request(struct ceph_osd_request *req) { kref_get(&req->r_kref); @@ -233,5 +277,14 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct page **pages, int nr_pages, int flags, int do_sync, bool nofail); +/* watch/notify events */ +extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, + void (*event_cb)(u64, u64, u8, void *), + int one_shot, void *data, + struct ceph_osd_event **pevent); +extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); +extern int ceph_osdc_wait_event(struct ceph_osd_event *event, + unsigned long timeout); +extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 6d5247f2e81..0a99099801a 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -12,9 +12,9 @@ * osdmap encoding versions */ #define CEPH_OSDMAP_INC_VERSION 5 -#define CEPH_OSDMAP_INC_VERSION_EXT 5 +#define CEPH_OSDMAP_INC_VERSION_EXT 6 #define CEPH_OSDMAP_VERSION 5 -#define CEPH_OSDMAP_VERSION_EXT 5 +#define CEPH_OSDMAP_VERSION_EXT 6 /* * fs id @@ -181,9 +181,17 @@ enum { /* read */ CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, + CEPH_OSD_OP_MAPEXT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3, /* fancy read */ - CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, + CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, + CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5, + + CEPH_OSD_OP_NOTIFY = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6, + CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7, + + /* versioning */ + CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8, /* write */ CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, @@ -205,6 +213,8 @@ enum { CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, + CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -218,11 +228,14 @@ enum { CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, /** subop **/ - CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, - CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, - CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, - CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, - CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, + CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, + CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, + CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, + CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, + CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, + CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, + CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, + CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -328,6 +341,8 @@ enum { CEPH_OSD_CMPXATTR_MODE_U64 = 2 }; +#define RADOS_NOTIFY_VER 1 + /* * an individual object operation. each may be accompanied by some data * payload @@ -359,7 +374,12 @@ struct ceph_osd_op { struct { __le64 snapid; } __attribute__ ((packed)) snap; - }; + struct { + __le64 cookie; + __le64 ver; + __u8 flag; /* 0 = unwatch, 1 = watch */ + } __attribute__ ((packed)) watch; +}; __le32 payload_len; } __attribute__ ((packed)); @@ -402,4 +422,5 @@ struct ceph_osd_reply_head { } __attribute__ ((packed)); + #endif |