diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/autofs4/root.c | 5 | ||||
-rw-r--r-- | fs/cachefiles/internal.h | 1 | ||||
-rw-r--r-- | fs/cachefiles/namei.c | 98 | ||||
-rw-r--r-- | fs/cachefiles/security.c | 4 | ||||
-rw-r--r-- | fs/ceph/addr.c | 6 | ||||
-rw-r--r-- | fs/ceph/caps.c | 19 | ||||
-rw-r--r-- | fs/ceph/inode.c | 4 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 34 | ||||
-rw-r--r-- | fs/ceph/messenger.c | 17 | ||||
-rw-r--r-- | fs/ceph/messenger.h | 1 | ||||
-rw-r--r-- | fs/ceph/osd_client.c | 26 | ||||
-rw-r--r-- | fs/ceph/osd_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 29 | ||||
-rw-r--r-- | fs/ceph/osdmap.h | 2 | ||||
-rw-r--r-- | fs/ceph/rados.h | 1 | ||||
-rw-r--r-- | fs/ceph/super.c | 23 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 1 | ||||
-rw-r--r-- | fs/cifs/inode.c | 21 | ||||
-rw-r--r-- | fs/compat.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 6 | ||||
-rw-r--r-- | fs/proc/array.c | 3 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 19 |
23 files changed, 234 insertions, 93 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 109a6c606d9..e8e5e63ac95 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) } /* Trigger mount for path component or follow link */ } else if (ino->flags & AUTOFS_INF_PENDING || - autofs4_need_mount(flags) || - current->link_count) { + autofs4_need_mount(flags)) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); @@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); - status = try_to_fill_dentry(dentry, 0); + status = try_to_fill_dentry(dentry, nd->flags); if (status) goto out_error; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index f7c255f9c62..a8cd821226d 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -34,6 +34,7 @@ struct cachefiles_object { loff_t i_size; /* object size */ unsigned long flags; #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ +#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ atomic_t usage; /* object usage count */ uint8_t type; /* object type */ uint8_t new; /* T if object new */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d5db84a1ee0..f4a7840bf42 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, } /* + * mark the owner of a dentry, if there is one, to indicate that that dentry + * has been preemptively deleted + * - the caller must hold the i_mutex on the dentry's parent as required to + * call vfs_unlink(), vfs_rmdir() or vfs_rename() + */ +static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, + struct dentry *dentry) +{ + struct cachefiles_object *object; + struct rb_node *p; + + _enter(",'%*.*s'", + dentry->d_name.len, dentry->d_name.len, dentry->d_name.name); + + write_lock(&cache->active_lock); + + p = cache->active_nodes.rb_node; + while (p) { + object = rb_entry(p, struct cachefiles_object, active_node); + if (object->dentry > dentry) + p = p->rb_left; + else if (object->dentry < dentry) + p = p->rb_right; + else + goto found_dentry; + } + + write_unlock(&cache->active_lock); + _leave(" [no owner]"); + return; + + /* found the dentry for */ +found_dentry: + kdebug("preemptive burial: OBJ%x [%s] %p", + object->fscache.debug_id, + fscache_object_states[object->fscache.state], + dentry); + + if (object->fscache.state < FSCACHE_OBJECT_DYING) { + printk(KERN_ERR "\n"); + printk(KERN_ERR "CacheFiles: Error:" + " Can't preemptively bury live object\n"); + cachefiles_printk_object(object, NULL); + } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + printk(KERN_ERR "CacheFiles: Error:" + " Object already preemptively buried\n"); + } + + write_unlock(&cache->active_lock); + _leave(" [owner marked]"); +} + +/* * record the fact that an object is now active */ static int cachefiles_mark_object_active(struct cachefiles_cache *cache, @@ -219,7 +272,8 @@ requeue: */ static int cachefiles_bury_object(struct cachefiles_cache *cache, struct dentry *dir, - struct dentry *rep) + struct dentry *rep, + bool preemptive) { struct dentry *grave, *trap; char nbuffer[8 + 8 + 1]; @@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, dir->d_name.len, dir->d_name.len, dir->d_name.name, rep->d_name.len, rep->d_name.len, rep->d_name.name); + _debug("remove %p from %p", rep, dir); + /* non-directories can just be unlinked */ if (!S_ISDIR(rep->d_inode->i_mode)) { _debug("unlink stale object"); ret = vfs_unlink(dir->d_inode, rep); + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + mutex_unlock(&dir->d_inode->i_mutex); if (ret == -EIO) @@ -325,6 +384,9 @@ try_again: if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + unlock_rename(cache->graveyard, dir); dput(grave); _leave(" = 0"); @@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, struct dentry *dir; int ret; - _enter(",{%p}", object->dentry); + _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry); ASSERT(object->dentry); ASSERT(object->dentry->d_inode); @@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - /* we need to check that our parent is _still_ our parent - it may have - * been renamed */ - if (dir == object->dentry->d_parent) { - ret = cachefiles_bury_object(cache, dir, object->dentry); - } else { - /* it got moved, presumably by cachefilesd culling it, so it's - * no longer in the key path and we can ignore it */ + if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + /* object allocation for the same key preemptively deleted this + * object's file so that it could create its own file */ + _debug("object preemptively buried"); mutex_unlock(&dir->d_inode->i_mutex); ret = 0; + } else { + /* we need to check that our parent is _still_ our parent - it + * may have been renamed */ + if (dir == object->dentry->d_parent) { + ret = cachefiles_bury_object(cache, dir, + object->dentry, false); + } else { + /* it got moved, presumably by cachefilesd culling it, + * so it's no longer in the key path and we can ignore + * it */ + mutex_unlock(&dir->d_inode->i_mutex); + ret = 0; + } } dput(dir); @@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, const char *name; int ret, nlen; - _enter("{%p},,%s,", parent->dentry, key); + _enter("OBJ%x{%p},OBJ%x,%s,", + parent->fscache.debug_id, parent->dentry, + object->fscache.debug_id, key); cache = container_of(parent->fscache.cache, struct cachefiles_cache, cache); @@ -509,7 +583,7 @@ lookup_again: * mutex) */ object->dentry = NULL; - ret = cachefiles_bury_object(cache, dir, next); + ret = cachefiles_bury_object(cache, dir, next, true); dput(next); next = NULL; @@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, /* actually remove the victim (drops the dir mutex) */ _debug("bury"); - ret = cachefiles_bury_object(cache, dir, victim); + ret = cachefiles_bury_object(cache, dir, victim, false); if (ret < 0) goto error; diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index b5808cdb223..039b5011d83 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, /* * check the security details of the on-disk cache * - must be called with security override in force + * - must return with a security override in force - even in the case of an + * error */ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, struct dentry *root, @@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, * which create files */ ret = set_create_files_as(new, root->d_inode); if (ret < 0) { + abort_creds(new); + cachefiles_begin_secure(cache, _saved_cred); _leave(" = %d [cfa]", ret); return ret; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4b42c2bb603..a9005d862ed 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req, int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - struct writeback_control *wbc = req->r_wbc; __s32 rc = -EIO; u64 bytes = 0; struct ceph_client *client = ceph_inode_to_client(inode); @@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); - if (i >= wrote) { - dout("inode %p skipping page %p\n", inode, page); - wbc->pages_skipped++; - } ceph_put_snap_context((void *)page->private); page->private = 0; ClearPagePrivate(page); @@ -799,7 +794,6 @@ get_more_pages: alloc_page_vec(client, req); req->r_callback = writepages_finish; req->r_inode = inode; - req->r_wbc = wbc; } /* note position of first page in pvec */ diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0c168180686..d9400534b27 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) } /* + * Remove a cap. Take steps to deal with a racing iterate_session_caps. + * * caller should hold i_lock. * caller will not hold session s_mutex if called from destroy_inode. */ @@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap) struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - cap->ci = NULL; - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap) list_del_init(&cap->session_caps); session->s_nr_caps--; cap->session = NULL; + removed = 1; } + /* protect backpointer with s_cap_lock: see iterate_session_caps */ + cap->ci = NULL; spin_unlock(&session->s_cap_lock); - if (cap->session == NULL) + /* remove from inode list */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + + if (removed) ceph_put_cap(cap); if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 261f3e6c0bc..85b4d2ffdeb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -733,6 +733,10 @@ no_change: __ceph_get_fmode(ci, cap_fmode); spin_unlock(&inode->i_lock); } + } else if (cap_fmode >= 0) { + pr_warning("mds issued no caps on %llx.%llx\n", + ceph_vinop(inode)); + __ceph_get_fmode(ci, cap_fmode); } /* update delegation info? */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 60a9a4ae47b..24561a557e0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) } /* - * Helper to safely iterate over all caps associated with a session. + * Helper to safely iterate over all caps associated with a session, with + * special care taken to handle a racing __ceph_remove_cap(). * - * caller must hold session s_mutex + * Caller must hold session s_mutex. */ static int iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, struct ceph_cap *, @@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) struct ceph_mds_session *session = NULL; struct ceph_msg *reply; struct rb_node *p; - int err; + int err = -ENOMEM; struct ceph_pagelist *pagelist; pr_info("reconnect to recovering mds%d\n", mds); @@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) goto fail; err = iterate_session_caps(session, encode_caps_cb, pagelist); if (err < 0) - goto out; + goto fail; /* * snaprealms. we provide mds with the ino, seq (version), and @@ -2213,28 +2214,31 @@ send: reply->nr_pages = calc_pages_for(0, pagelist->length); ceph_con_send(&session->s_con, reply); - if (session) { - session->s_state = CEPH_MDS_SESSION_OPEN; - __wake_requests(mdsc, &session->s_waiting); - } + session->s_state = CEPH_MDS_SESSION_OPEN; + mutex_unlock(&session->s_mutex); + + mutex_lock(&mdsc->mutex); + __wake_requests(mdsc, &session->s_waiting); + mutex_unlock(&mdsc->mutex); + + ceph_put_mds_session(session); -out: up_read(&mdsc->snap_rwsem); - if (session) { - mutex_unlock(&session->s_mutex); - ceph_put_mds_session(session); - } mutex_lock(&mdsc->mutex); return; fail: ceph_msg_put(reply); + up_read(&mdsc->snap_rwsem); + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); fail_nomsg: ceph_pagelist_release(pagelist); kfree(pagelist); fail_nopagelist: - pr_err("ENOMEM preparing reconnect for mds%d\n", mds); - goto out; + pr_err("error %d preparing reconnect for mds%d\n", err, mds); + mutex_lock(&mdsc->mutex); + return; } diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 509f57d9ccb..cd4fadb6491 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) list_move_tail(&m->list_head, &con->out_sent); } - m->hdr.seq = cpu_to_le64(++con->out_seq); + /* + * only assign outgoing seq # if we haven't sent this message + * yet. if it is requeued, resend with it's original seq. + */ + if (m->needs_out_seq) { + m->hdr.seq = cpu_to_le64(++con->out_seq); + m->needs_out_seq = false; + } dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", m, con->out_seq, le16_to_cpu(m->hdr.type), @@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); + msg->needs_out_seq = true; + /* queue */ mutex_lock(&con->mutex); BUG_ON(!list_empty(&msg->list_head)); @@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, kref_init(&m->kref); INIT_LIST_HEAD(&m->list_head); + m->hdr.tid = 0; m->hdr.type = cpu_to_le16(type); + m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); m->hdr.middle_len = 0; m->hdr.data_len = cpu_to_le32(page_len); m->hdr.data_off = cpu_to_le16(page_off); - m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.reserved = 0; m->footer.front_crc = 0; m->footer.middle_crc = 0; m->footer.data_crc = 0; + m->footer.flags = 0; m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cd..a5caf91cc97 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -86,6 +86,7 @@ struct ceph_msg { struct kref kref; bool front_is_vmalloc; bool more_to_follow; + bool needs_out_seq; int front_max; struct ceph_msgpool *pool; diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c7b4dedaace..3514f71ff85 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc, { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; - int o = -1; + int acting[CEPH_PG_MAX_SIZE]; + int o = -1, num = 0; int err; dout("map_osds %p tid %lld\n", req, req->r_tid); @@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc, pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; - o = ceph_calc_pg_primary(osdc->osdmap, pgid); + err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); + if (err > 0) { + o = acting[0]; + num = err; + } if ((req->r_osd && req->r_osd->o_osd == o && - req->r_sent >= req->r_osd->o_incarnation) || + req->r_sent >= req->r_osd->o_incarnation && + req->r_num_pg_osds == num && + memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || (req->r_osd == NULL && o == -1)) return 0; /* no change */ @@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc, req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, req->r_osd ? req->r_osd->o_osd : -1); + /* record full pg acting set */ + memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); + req->r_num_pg_osds = num; + if (req->r_osd) { __cancel_request(req); list_del_init(&req->r_osd_item); @@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc, __remove_osd_from_lru(req->r_osd); list_add(&req->r_osd_item, &req->r_osd->o_requests); } - err = 1; /* osd changed */ + err = 1; /* osd or pg changed */ out: return err; @@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_osd_request *req; u64 tid; int numops, object_len, flags; + s32 result; tid = le64_to_cpu(msg->hdr.tid); if (msg->front.iov_len < sizeof(*rhead)) goto bad; numops = le32_to_cpu(rhead->num_ops); object_len = le32_to_cpu(rhead->object_len); + result = le32_to_cpu(rhead->result); if (msg->front.iov_len != sizeof(*rhead) + object_len + numops * sizeof(struct ceph_osd_op)) goto bad; - dout("handle_reply %p tid %llu\n", msg, tid); + dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); /* lookup */ mutex_lock(&osdc->request_mutex); @@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("handle_reply tid %llu flags %d\n", tid, flags); /* either this is a read, or we got the safe response */ - if ((flags & CEPH_OSD_FLAG_ONDISK) || + if (result < 0 || + (flags & CEPH_OSD_FLAG_ONDISK) || ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index b0759911e7c..ce776989ef6 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -48,6 +48,8 @@ struct ceph_osd_request { struct list_head r_osd_item; struct ceph_osd *r_osd; struct ceph_pg r_pgid; + int r_pg_osds[CEPH_PG_MAX_SIZE]; + int r_num_pg_osds; struct ceph_connection *r_con_filling_msg; @@ -66,7 +68,6 @@ struct ceph_osd_request { struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ - struct writeback_control *r_wbc; /* ditto */ char r_oid[40]; /* object name */ int r_oid_len; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 2e2c15eed82..cfdd8f4388b 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* + * Return acting set for given pgid. + */ +int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting) +{ + int rawosds[CEPH_PG_MAX_SIZE], *osds; + int i, o, num = CEPH_PG_MAX_SIZE; + + osds = calc_pg_raw(osdmap, pgid, rawosds, &num); + if (!osds) + return -1; + + /* primary is first up osd */ + o = 0; + for (i = 0; i < num; i++) + if (ceph_osd_is_up(osdmap, osds[i])) + acting[o++] = osds[i]; + return o; +} + +/* * Return primary osd for given pgid, or -1 if none. */ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) { - int rawosds[10], *osds; - int i, num = ARRAY_SIZE(rawosds); + int rawosds[CEPH_PG_MAX_SIZE], *osds; + int i, num = CEPH_PG_MAX_SIZE; osds = calc_pg_raw(osdmap, pgid, rawosds, &num); if (!osds) @@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) /* primary is first up osd */ for (i = 0; i < num; i++) - if (ceph_osd_is_up(osdmap, osds[i])) { + if (ceph_osd_is_up(osdmap, osds[i])) return osds[i]; - break; - } return -1; } diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 8bc9f1e4f56..970b547e510 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h @@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index a1fc1d017b5..fd56451a871 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -58,6 +58,7 @@ struct ceph_timespec { #define CEPH_PG_LAYOUT_LINEAR 2 #define CEPH_PG_LAYOUT_HYBRID 3 +#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ /* * placement group. diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f888cf487b7..110857ba926 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len) */ static void ceph_put_super(struct super_block *s) { - struct ceph_client *cl = ceph_client(s); + struct ceph_client *client = ceph_sb_to_client(s); dout("put_super\n"); - ceph_mdsc_close_sessions(&cl->mdsc); + ceph_mdsc_close_sessions(&client->mdsc); + + /* + * ensure we release the bdi before put_anon_super releases + * the device name. + */ + if (s->s_bdi == &client->backing_dev_info) { + bdi_unregister(&client->backing_dev_info); + s->s_bdi = NULL; + } + return; } @@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client) destroy_workqueue(client->pg_inv_wq); destroy_workqueue(client->trunc_wq); + bdi_destroy(&client->backing_dev_info); + if (client->msgr) ceph_messenger_destroy(client->msgr); mempool_destroy(client->wb_pagevec_pool); @@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { int err; - sb->s_bdi = &client->backing_dev_info; - /* set ra_pages based on rsize mount option? */ if (client->mount_args->rsize >= PAGE_CACHE_SIZE) client->backing_dev_info.ra_pages = (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); + if (!err) + sb->s_bdi = &client->backing_dev_info; return err; } @@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s) dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(&client->mdsc); kill_anon_super(s); /* will call put_super after sb is r/o */ - if (s->s_bdi == &client->backing_dev_info) - bdi_unregister(&client->backing_dev_info); - bdi_destroy(&client->backing_dev_info); ceph_destroy_client(client); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ecf0ffbe2b6..0c2fd17439c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -502,6 +502,7 @@ struct dfs_info3_param { #define CIFS_FATTR_DFS_REFERRAL 0x1 #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 +#define CIFS_FATTR_INO_COLLISION 0x8 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35ec1171621..29b9ea244c8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque) if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; + /* + * uh oh -- it's a directory. We can't use it since hardlinked dirs are + * verboten. Disable serverino and return it as if it were found, the + * caller can discard it, generate a uniqueid and retry the find + */ + if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { + fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; + cifs_autodisable_serverino(CIFS_SB(inode->i_sb)); + } + return 1; } @@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) unsigned long hash; struct inode *inode; +retry_iget5_locked: cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); /* hash down to 32-bits on 32-bit arch */ hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); - - /* we have fattrs in hand, update the inode */ if (inode) { + /* was there a problematic inode number collision? */ + if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { + iput(inode); + fattr->cf_uniqueid = iunique(sb, ROOT_I); + fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; + goto retry_iget5_locked; + } + cifs_fattr_to_inode(inode, fattr); if (sb->s_flags & MS_NOATIME) inode->i_flags |= S_NOATIME | S_NOCMTIME; diff --git a/fs/compat.c b/fs/compat.c index 4b6ed03cc47..05448730f84 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename, if (retval < 0) goto out; - current->stack_start = current->mm->start_stack; - /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/exec.c b/fs/exec.c index 49cdaa19e5b..e6e94c626c2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1387,8 +1387,6 @@ int do_execve(char * filename, if (retval < 0) goto out; - current->stack_start = current->mm->start_stack; - /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/namei.c b/fs/namei.c index a7dce91a7e4..16df7277a92 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) { if (open_flag & O_CREAT) goto exit; - nd->flags |= LOOKUP_DIRECTORY; + nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; } /* just plain open? */ @@ -1830,6 +1830,8 @@ reval: } if (open_flag & O_DIRECTORY) nd.flags |= LOOKUP_DIRECTORY; + if (!(open_flag & O_NOFOLLOW)) + nd.flags |= LOOKUP_FOLLOW; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path holder; @@ -1837,7 +1839,7 @@ reval: void *cookie; error = -ELOOP; /* S_ISDIR part is a temporary automount kludge */ - if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) goto exit_dput; if (count++ == 32) goto exit_dput; diff --git a/fs/proc/array.c b/fs/proc/array.c index e51f2ec2c5e..885ab5513ac 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -81,7 +81,6 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/tracehook.h> -#include <linux/swapops.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, rsslim, mm ? mm->start_code : 0, mm ? mm->end_code : 0, - (permitted && mm) ? task->stack_start : 0, + (permitted && mm) ? mm->start_stack : 0, esp, eip, /* The signal information here is obsolete. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 070553427dd..47f5b145f56 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { name = "[stack]"; - } else { - unsigned long stack_start; - struct proc_maps_private *pmp; - - pmp = m->private; - stack_start = pmp->task->stack_start; - - if (vma->vm_start <= stack_start && - vma->vm_end >= stack_start) { - pad_len_spaces(m, len); - seq_printf(m, - "[threadstack:%08lx]", -#ifdef CONFIG_STACK_GROWSUP - vma->vm_end - stack_start -#else - stack_start - vma->vm_start -#endif - ); - } } } else { name = "[vdso]"; |