summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c19
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c17
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c29
-rw-r--r--fs/ceph/osdmap.h2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/super.c23
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/compat.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/namei.c6
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/task_mmu.c19
23 files changed, 234 insertions, 93 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d9..e8e5e63ac95 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
}
/* Trigger mount for path component or follow link */
} else if (ino->flags & AUTOFS_INF_PENDING ||
- autofs4_need_mount(flags) ||
- current->link_count) {
+ autofs4_need_mount(flags)) {
DPRINTK("waiting for mount name=%.*s",
dentry->d_name.len, dentry->d_name.name);
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
- status = try_to_fill_dentry(dentry, 0);
+ status = try_to_fill_dentry(dentry, nd->flags);
if (status)
goto out_error;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c62..a8cd821226d 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
loff_t i_size; /* object size */
unsigned long flags;
#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
+#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
atomic_t usage; /* object usage count */
uint8_t type; /* object type */
uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0..f4a7840bf42 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
}
/*
+ * mark the owner of a dentry, if there is one, to indicate that that dentry
+ * has been preemptively deleted
+ * - the caller must hold the i_mutex on the dentry's parent as required to
+ * call vfs_unlink(), vfs_rmdir() or vfs_rename()
+ */
+static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
+ struct dentry *dentry)
+{
+ struct cachefiles_object *object;
+ struct rb_node *p;
+
+ _enter(",'%*.*s'",
+ dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
+
+ write_lock(&cache->active_lock);
+
+ p = cache->active_nodes.rb_node;
+ while (p) {
+ object = rb_entry(p, struct cachefiles_object, active_node);
+ if (object->dentry > dentry)
+ p = p->rb_left;
+ else if (object->dentry < dentry)
+ p = p->rb_right;
+ else
+ goto found_dentry;
+ }
+
+ write_unlock(&cache->active_lock);
+ _leave(" [no owner]");
+ return;
+
+ /* found the dentry for */
+found_dentry:
+ kdebug("preemptive burial: OBJ%x [%s] %p",
+ object->fscache.debug_id,
+ fscache_object_states[object->fscache.state],
+ dentry);
+
+ if (object->fscache.state < FSCACHE_OBJECT_DYING) {
+ printk(KERN_ERR "\n");
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Can't preemptively bury live object\n");
+ cachefiles_printk_object(object, NULL);
+ } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Object already preemptively buried\n");
+ }
+
+ write_unlock(&cache->active_lock);
+ _leave(" [owner marked]");
+}
+
+/*
* record the fact that an object is now active
*/
static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
*/
static int cachefiles_bury_object(struct cachefiles_cache *cache,
struct dentry *dir,
- struct dentry *rep)
+ struct dentry *rep,
+ bool preemptive)
{
struct dentry *grave, *trap;
char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
dir->d_name.len, dir->d_name.len, dir->d_name.name,
rep->d_name.len, rep->d_name.len, rep->d_name.name);
+ _debug("remove %p from %p", rep, dir);
+
/* non-directories can just be unlinked */
if (!S_ISDIR(rep->d_inode->i_mode)) {
_debug("unlink stale object");
ret = vfs_unlink(dir->d_inode, rep);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+
mutex_unlock(&dir->d_inode->i_mutex);
if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache, "Rename failed with error %d", ret);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+
unlock_rename(cache->graveyard, dir);
dput(grave);
_leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
struct dentry *dir;
int ret;
- _enter(",{%p}", object->dentry);
+ _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
ASSERT(object->dentry);
ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- /* we need to check that our parent is _still_ our parent - it may have
- * been renamed */
- if (dir == object->dentry->d_parent) {
- ret = cachefiles_bury_object(cache, dir, object->dentry);
- } else {
- /* it got moved, presumably by cachefilesd culling it, so it's
- * no longer in the key path and we can ignore it */
+ if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ /* object allocation for the same key preemptively deleted this
+ * object's file so that it could create its own file */
+ _debug("object preemptively buried");
mutex_unlock(&dir->d_inode->i_mutex);
ret = 0;
+ } else {
+ /* we need to check that our parent is _still_ our parent - it
+ * may have been renamed */
+ if (dir == object->dentry->d_parent) {
+ ret = cachefiles_bury_object(cache, dir,
+ object->dentry, false);
+ } else {
+ /* it got moved, presumably by cachefilesd culling it,
+ * so it's no longer in the key path and we can ignore
+ * it */
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = 0;
+ }
}
dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
const char *name;
int ret, nlen;
- _enter("{%p},,%s,", parent->dentry, key);
+ _enter("OBJ%x{%p},OBJ%x,%s,",
+ parent->fscache.debug_id, parent->dentry,
+ object->fscache.debug_id, key);
cache = container_of(parent->fscache.cache,
struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
* mutex) */
object->dentry = NULL;
- ret = cachefiles_bury_object(cache, dir, next);
+ ret = cachefiles_bury_object(cache, dir, next, true);
dput(next);
next = NULL;
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
/* actually remove the victim (drops the dir mutex) */
_debug("bury");
- ret = cachefiles_bury_object(cache, dir, victim);
+ ret = cachefiles_bury_object(cache, dir, victim, false);
if (ret < 0)
goto error;
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb223..039b5011d83 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
/*
* check the security details of the on-disk cache
* - must be called with security override in force
+ * - must return with a security override in force - even in the case of an
+ * error
*/
int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
* which create files */
ret = set_create_files_as(new, root->d_inode);
if (ret < 0) {
+ abort_creds(new);
+ cachefiles_begin_secure(cache, _saved_cred);
_leave(" = %d [cfa]", ret);
return ret;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4b42c2bb603..a9005d862ed 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req,
int i;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- struct writeback_control *wbc = req->r_wbc;
__s32 rc = -EIO;
u64 bytes = 0;
struct ceph_client *client = ceph_inode_to_client(inode);
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
clear_bdi_congested(&client->backing_dev_info,
BLK_RW_ASYNC);
- if (i >= wrote) {
- dout("inode %p skipping page %p\n", inode, page);
- wbc->pages_skipped++;
- }
ceph_put_snap_context((void *)page->private);
page->private = 0;
ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
alloc_page_vec(client, req);
req->r_callback = writepages_finish;
req->r_inode = inode;
- req->r_wbc = wbc;
}
/* note position of first page in pvec */
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0c168180686..d9400534b27 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
}
/*
+ * Remove a cap. Take steps to deal with a racing iterate_session_caps.
+ *
* caller should hold i_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
- /* remove from inode list */
- rb_erase(&cap->ci_node, &ci->i_caps);
- cap->ci = NULL;
- if (ci->i_auth_cap == cap)
- ci->i_auth_cap = NULL;
-
/* remove from session list */
spin_lock(&session->s_cap_lock);
if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
list_del_init(&cap->session_caps);
session->s_nr_caps--;
cap->session = NULL;
+ removed = 1;
}
+ /* protect backpointer with s_cap_lock: see iterate_session_caps */
+ cap->ci = NULL;
spin_unlock(&session->s_cap_lock);
- if (cap->session == NULL)
+ /* remove from inode list */
+ rb_erase(&cap->ci_node, &ci->i_caps);
+ if (ci->i_auth_cap == cap)
+ ci->i_auth_cap = NULL;
+
+ if (removed)
ceph_put_cap(cap);
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 261f3e6c0bc..85b4d2ffdeb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
__ceph_get_fmode(ci, cap_fmode);
spin_unlock(&inode->i_lock);
}
+ } else if (cap_fmode >= 0) {
+ pr_warning("mds issued no caps on %llx.%llx\n",
+ ceph_vinop(inode));
+ __ceph_get_fmode(ci, cap_fmode);
}
/* update delegation info? */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47b..24561a557e0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
}
/*
- * Helper to safely iterate over all caps associated with a session.
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
*
- * caller must hold session s_mutex
+ * Caller must hold session s_mutex.
*/
static int iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
struct ceph_mds_session *session = NULL;
struct ceph_msg *reply;
struct rb_node *p;
- int err;
+ int err = -ENOMEM;
struct ceph_pagelist *pagelist;
pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
goto fail;
err = iterate_session_caps(session, encode_caps_cb, pagelist);
if (err < 0)
- goto out;
+ goto fail;
/*
* snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
reply->nr_pages = calc_pages_for(0, pagelist->length);
ceph_con_send(&session->s_con, reply);
- if (session) {
- session->s_state = CEPH_MDS_SESSION_OPEN;
- __wake_requests(mdsc, &session->s_waiting);
- }
+ session->s_state = CEPH_MDS_SESSION_OPEN;
+ mutex_unlock(&session->s_mutex);
+
+ mutex_lock(&mdsc->mutex);
+ __wake_requests(mdsc, &session->s_waiting);
+ mutex_unlock(&mdsc->mutex);
+
+ ceph_put_mds_session(session);
-out:
up_read(&mdsc->snap_rwsem);
- if (session) {
- mutex_unlock(&session->s_mutex);
- ceph_put_mds_session(session);
- }
mutex_lock(&mdsc->mutex);
return;
fail:
ceph_msg_put(reply);
+ up_read(&mdsc->snap_rwsem);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
fail_nomsg:
ceph_pagelist_release(pagelist);
kfree(pagelist);
fail_nopagelist:
- pr_err("ENOMEM preparing reconnect for mds%d\n", mds);
- goto out;
+ pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+ mutex_lock(&mdsc->mutex);
+ return;
}
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 509f57d9ccb..cd4fadb6491 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
list_move_tail(&m->list_head, &con->out_sent);
}
- m->hdr.seq = cpu_to_le64(++con->out_seq);
+ /*
+ * only assign outgoing seq # if we haven't sent this message
+ * yet. if it is requeued, resend with it's original seq.
+ */
+ if (m->needs_out_seq) {
+ m->hdr.seq = cpu_to_le64(++con->out_seq);
+ m->needs_out_seq = false;
+ }
dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+ msg->needs_out_seq = true;
+
/* queue */
mutex_lock(&con->mutex);
BUG_ON(!list_empty(&msg->list_head));
@@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
kref_init(&m->kref);
INIT_LIST_HEAD(&m->list_head);
+ m->hdr.tid = 0;
m->hdr.type = cpu_to_le16(type);
+ m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.version = 0;
m->hdr.front_len = cpu_to_le32(front_len);
m->hdr.middle_len = 0;
m->hdr.data_len = cpu_to_le32(page_len);
m->hdr.data_off = cpu_to_le16(page_off);
- m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.reserved = 0;
m->footer.front_crc = 0;
m->footer.middle_crc = 0;
m->footer.data_crc = 0;
+ m->footer.flags = 0;
m->front_max = front_len;
m->front_is_vmalloc = false;
m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cd..a5caf91cc97 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
struct kref kref;
bool front_is_vmalloc;
bool more_to_follow;
+ bool needs_out_seq;
int front_max;
struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace..3514f71ff85 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
{
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg pgid;
- int o = -1;
+ int acting[CEPH_PG_MAX_SIZE];
+ int o = -1, num = 0;
int err;
dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
pgid = reqhead->layout.ol_pgid;
req->r_pgid = pgid;
- o = ceph_calc_pg_primary(osdc->osdmap, pgid);
+ err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
+ if (err > 0) {
+ o = acting[0];
+ num = err;
+ }
if ((req->r_osd && req->r_osd->o_osd == o &&
- req->r_sent >= req->r_osd->o_incarnation) ||
+ req->r_sent >= req->r_osd->o_incarnation &&
+ req->r_num_pg_osds == num &&
+ memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
(req->r_osd == NULL && o == -1))
return 0; /* no change */
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
req->r_osd ? req->r_osd->o_osd : -1);
+ /* record full pg acting set */
+ memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+ req->r_num_pg_osds = num;
+
if (req->r_osd) {
__cancel_request(req);
list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
__remove_osd_from_lru(req->r_osd);
list_add(&req->r_osd_item, &req->r_osd->o_requests);
}
- err = 1; /* osd changed */
+ err = 1; /* osd or pg changed */
out:
return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
struct ceph_osd_request *req;
u64 tid;
int numops, object_len, flags;
+ s32 result;
tid = le64_to_cpu(msg->hdr.tid);
if (msg->front.iov_len < sizeof(*rhead))
goto bad;
numops = le32_to_cpu(rhead->num_ops);
object_len = le32_to_cpu(rhead->object_len);
+ result = le32_to_cpu(rhead->result);
if (msg->front.iov_len != sizeof(*rhead) + object_len +
numops * sizeof(struct ceph_osd_op))
goto bad;
- dout("handle_reply %p tid %llu\n", msg, tid);
+ dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
/* lookup */
mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("handle_reply tid %llu flags %d\n", tid, flags);
/* either this is a read, or we got the safe response */
- if ((flags & CEPH_OSD_FLAG_ONDISK) ||
+ if (result < 0 ||
+ (flags & CEPH_OSD_FLAG_ONDISK) ||
((flags & CEPH_OSD_FLAG_WRITE) == 0))
__unregister_request(osdc, req);
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c..ce776989ef6 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
struct list_head r_osd_item;
struct ceph_osd *r_osd;
struct ceph_pg r_pgid;
+ int r_pg_osds[CEPH_PG_MAX_SIZE];
+ int r_num_pg_osds;
struct ceph_connection *r_con_filling_msg;
@@ -66,7 +68,6 @@ struct ceph_osd_request {
struct list_head r_unsafe_item;
struct inode *r_inode; /* for use by callbacks */
- struct writeback_control *r_wbc; /* ditto */
char r_oid[40]; /* object name */
int r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15eed82..cfdd8f4388b 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
}
/*
+ * Return acting set for given pgid.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+ int *acting)
+{
+ int rawosds[CEPH_PG_MAX_SIZE], *osds;
+ int i, o, num = CEPH_PG_MAX_SIZE;
+
+ osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
+ if (!osds)
+ return -1;
+
+ /* primary is first up osd */
+ o = 0;
+ for (i = 0; i < num; i++)
+ if (ceph_osd_is_up(osdmap, osds[i]))
+ acting[o++] = osds[i];
+ return o;
+}
+
+/*
* Return primary osd for given pgid, or -1 if none.
*/
int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
{
- int rawosds[10], *osds;
- int i, num = ARRAY_SIZE(rawosds);
+ int rawosds[CEPH_PG_MAX_SIZE], *osds;
+ int i, num = CEPH_PG_MAX_SIZE;
osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
/* primary is first up osd */
for (i = 0; i < num; i++)
- if (ceph_osd_is_up(osdmap, osds[i])) {
+ if (ceph_osd_is_up(osdmap, osds[i]))
return osds[i];
- break;
- }
return -1;
}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e4f56..970b547e510 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
const char *oid,
struct ceph_file_layout *fl,
struct ceph_osdmap *osdmap);
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+ int *acting);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d017b5..fd56451a871 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
#define CEPH_PG_LAYOUT_LINEAR 2
#define CEPH_PG_LAYOUT_HYBRID 3
+#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
/*
* placement group.
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f888cf487b7..110857ba926 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
*/
static void ceph_put_super(struct super_block *s)
{
- struct ceph_client *cl = ceph_client(s);
+ struct ceph_client *client = ceph_sb_to_client(s);
dout("put_super\n");
- ceph_mdsc_close_sessions(&cl->mdsc);
+ ceph_mdsc_close_sessions(&client->mdsc);
+
+ /*
+ * ensure we release the bdi before put_anon_super releases
+ * the device name.
+ */
+ if (s->s_bdi == &client->backing_dev_info) {
+ bdi_unregister(&client->backing_dev_info);
+ s->s_bdi = NULL;
+ }
+
return;
}
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
destroy_workqueue(client->pg_inv_wq);
destroy_workqueue(client->trunc_wq);
+ bdi_destroy(&client->backing_dev_info);
+
if (client->msgr)
ceph_messenger_destroy(client->msgr);
mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
int err;
- sb->s_bdi = &client->backing_dev_info;
-
/* set ra_pages based on rsize mount option? */
if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
client->backing_dev_info.ra_pages =
(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT;
err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
+ if (!err)
+ sb->s_bdi = &client->backing_dev_info;
return err;
}
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
dout("kill_sb %p\n", s);
ceph_mdsc_pre_umount(&client->mdsc);
kill_anon_super(s); /* will call put_super after sb is r/o */
- if (s->s_bdi == &client->backing_dev_info)
- bdi_unregister(&client->backing_dev_info);
- bdi_destroy(&client->backing_dev_info);
ceph_destroy_client(client);
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ecf0ffbe2b6..0c2fd17439c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -502,6 +502,7 @@ struct dfs_info3_param {
#define CIFS_FATTR_DFS_REFERRAL 0x1
#define CIFS_FATTR_DELETE_PENDING 0x2
#define CIFS_FATTR_NEED_REVAL 0x4
+#define CIFS_FATTR_INO_COLLISION 0x8
struct cifs_fattr {
u32 cf_flags;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35ec1171621..29b9ea244c8 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
+ /*
+ * uh oh -- it's a directory. We can't use it since hardlinked dirs are
+ * verboten. Disable serverino and return it as if it were found, the
+ * caller can discard it, generate a uniqueid and retry the find
+ */
+ if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
+ fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
+ cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
+ }
+
return 1;
}
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
unsigned long hash;
struct inode *inode;
+retry_iget5_locked:
cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
/* hash down to 32-bits on 32-bit arch */
hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
-
- /* we have fattrs in hand, update the inode */
if (inode) {
+ /* was there a problematic inode number collision? */
+ if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
+ iput(inode);
+ fattr->cf_uniqueid = iunique(sb, ROOT_I);
+ fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
+ goto retry_iget5_locked;
+ }
+
cifs_fattr_to_inode(inode, fattr);
if (sb->s_flags & MS_NOATIME)
inode->i_flags |= S_NOATIME | S_NOCMTIME;
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc47..05448730f84 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
- current->stack_start = current->mm->start_stack;
-
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b..e6e94c626c2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
if (retval < 0)
goto out;
- current->stack_start = current->mm->start_stack;
-
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e4..16df7277a92 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (nd->last.name[nd->last.len]) {
if (open_flag & O_CREAT)
goto exit;
- nd->flags |= LOOKUP_DIRECTORY;
+ nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
}
/* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
}
if (open_flag & O_DIRECTORY)
nd.flags |= LOOKUP_DIRECTORY;
+ if (!(open_flag & O_NOFOLLOW))
+ nd.flags |= LOOKUP_FOLLOW;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path holder;
@@ -1837,7 +1839,7 @@ reval:
void *cookie;
error = -ELOOP;
/* S_ISDIR part is a temporary automount kludge */
- if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
+ if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
goto exit_dput;
if (count++ == 32)
goto exit_dput;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e..885ab5513ac 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
-#include <linux/swapops.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
rsslim,
mm ? mm->start_code : 0,
mm ? mm->end_code : 0,
- (permitted && mm) ? task->stack_start : 0,
+ (permitted && mm) ? mm->start_stack : 0,
esp,
eip,
/* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd..47f5b145f56 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
} else if (vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack) {
name = "[stack]";
- } else {
- unsigned long stack_start;
- struct proc_maps_private *pmp;
-
- pmp = m->private;
- stack_start = pmp->task->stack_start;
-
- if (vma->vm_start <= stack_start &&
- vma->vm_end >= stack_start) {
- pad_len_spaces(m, len);
- seq_printf(m,
- "[threadstack:%08lx]",
-#ifdef CONFIG_STACK_GROWSUP
- vma->vm_end - stack_start
-#else
- stack_start - vma->vm_start
-#endif
- );
- }
}
} else {
name = "[vdso]";