diff options
Diffstat (limited to 'fs/dcache.c')
-rw-r--r-- | fs/dcache.c | 208 |
1 files changed, 137 insertions, 71 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index b80531c9177..85c9e2bff8e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -153,16 +153,12 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, * In contrast, 'ct' and 'tcount' can be from a pathname, and do * need the careful unaligned handling. */ -static inline int dentry_cmp(const unsigned char *cs, size_t scount, - const unsigned char *ct, size_t tcount) +static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { unsigned long a,b,mask; - if (unlikely(scount != tcount)) - return 1; - for (;;) { - a = load_unaligned_zeropad(cs); + a = *(unsigned long *)cs; b = load_unaligned_zeropad(ct); if (tcount < sizeof(unsigned long)) break; @@ -180,12 +176,8 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, #else -static inline int dentry_cmp(const unsigned char *cs, size_t scount, - const unsigned char *ct, size_t tcount) +static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { - if (scount != tcount) - return 1; - do { if (*cs != *ct) return 1; @@ -198,6 +190,30 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, #endif +static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) +{ + const unsigned char *cs; + /* + * Be careful about RCU walk racing with rename: + * use ACCESS_ONCE to fetch the name pointer. + * + * NOTE! Even if a rename will mean that the length + * was not loaded atomically, we don't care. The + * RCU walk will check the sequence count eventually, + * and catch it. And we won't overrun the buffer, + * because we're reading the name pointer atomically, + * and a dentry name is guaranteed to be properly + * terminated with a NUL byte. + * + * End result: even if 'len' is wrong, we'll exit + * early because the data cannot match (there can + * be no NUL in the ct/tcount data) + */ + cs = ACCESS_ONCE(dentry->d_name.name); + smp_read_barrier_depends(); + return dentry_string_cmp(cs, ct, tcount); +} + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); @@ -667,8 +683,6 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question - * @want_discon: flag, used by d_splice_alias, to request - * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -677,10 +691,9 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that one unless @want_discon is set, - * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. + * any other hashed alias over that. */ -static struct dentry *__d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias, *discon_alias; @@ -692,7 +705,7 @@ again: if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else if (!want_discon) { + } else { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -723,7 +736,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode, 0); + de = __d_find_alias(inode); spin_unlock(&inode->i_lock); } return de; @@ -1258,6 +1271,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) if (!dentry) return NULL; + /* + * We guarantee that the inline name is always NUL-terminated. + * This way the memcpy() done by the name switching in rename + * will still always have a NUL at the end, even if we might + * be overwriting an internal NUL character + */ + dentry->d_iname[DNAME_INLINE_LEN-1] = 0; if (name->len > DNAME_INLINE_LEN-1) { dname = kmalloc(name->len + 1, GFP_KERNEL); if (!dname) { @@ -1267,13 +1287,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } else { dname = dentry->d_iname; } - dentry->d_name.name = dname; dentry->d_name.len = name->len; dentry->d_name.hash = name->hash; memcpy(dname, name->name, name->len); dname[name->len] = 0; + /* Make sure we always see the terminating NUL character */ + smp_wmb(); + dentry->d_name.name = dname; + dentry->d_count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); @@ -1439,18 +1462,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, } list_for_each_entry(alias, &inode->i_dentry, d_alias) { - struct qstr *qstr = &alias->d_name; - /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or * parent changes, because the parent inode i_mutex is held. */ - if (qstr->hash != hash) + if (alias->d_name.hash != hash) continue; if (alias->d_parent != entry->d_parent) continue; - if (dentry_cmp(qstr->name, qstr->len, name, len)) + if (alias->d_name.len != len) + continue; + if (dentry_cmp(alias, name, len)) continue; __dget(alias); return alias; @@ -1489,7 +1512,7 @@ struct dentry *d_make_root(struct inode *root_inode) struct dentry *res = NULL; if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1 }; + static const struct qstr name = QSTR_INIT("/", 1); res = __d_alloc(root_inode->i_sb, &name); if (res) @@ -1624,9 +1647,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - new = __d_find_alias(inode, 1); + new = __d_find_any_alias(inode); if (new) { - BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); @@ -1727,6 +1749,48 @@ err_out: } EXPORT_SYMBOL(d_add_ci); +/* + * Do the slow-case of the dentry name compare. + * + * Unlike the dentry_cmp() function, we need to atomically + * load the name, length and inode information, so that the + * filesystem can rely on them, and can use the 'name' and + * 'len' information without worrying about walking off the + * end of memory etc. + * + * Thus the read_seqcount_retry() and the "duplicate" info + * in arguments (the low-level filesystem should not look + * at the dentry inode or name contents directly, since + * rename can change them while we're in RCU mode). + */ +enum slow_d_compare { + D_COMP_OK, + D_COMP_NOMATCH, + D_COMP_SEQRETRY, +}; + +static noinline enum slow_d_compare slow_dentry_cmp( + const struct dentry *parent, + struct inode *inode, + struct dentry *dentry, + unsigned int seq, + const struct qstr *name) +{ + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + struct inode *i = dentry->d_inode; + + if (read_seqcount_retry(&dentry->d_seq, seq)) { + cpu_relax(); + return D_COMP_SEQRETRY; + } + if (parent->d_op->d_compare(parent, inode, + dentry, i, + tlen, tname, name)) + return D_COMP_NOMATCH; + return D_COMP_OK; +} + /** * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry @@ -1753,15 +1817,17 @@ EXPORT_SYMBOL(d_add_ci); * the returned dentry, so long as its parent's seqlock is checked after the * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. + * + * NOTE! The caller *has* to check the resulting dentry against the sequence + * number we've returned before using any of the resulting dentry state! */ struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, - unsigned *seqp, struct inode **inode) + unsigned *seqp, struct inode *inode) { - unsigned int len = name->len; - unsigned int hash = name->hash; + u64 hashlen = name->hash_len; const unsigned char *str = name->name; - struct hlist_bl_head *b = d_hash(parent, hash); + struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); struct hlist_bl_node *node; struct dentry *dentry; @@ -1787,49 +1853,47 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { unsigned seq; - struct inode *i; - const char *tname; - int tlen; - - if (dentry->d_name.hash != hash) - continue; seqretry: - seq = read_seqcount_begin(&dentry->d_seq); + /* + * The dentry sequence count protects us from concurrent + * renames, and thus protects inode, parent and name fields. + * + * The caller must perform a seqcount check in order + * to do anything useful with the returned dentry, + * including using the 'd_inode' pointer. + * + * NOTE! We do a "raw" seqcount_begin here. That means that + * we don't wait for the sequence count to stabilize if it + * is in the middle of a sequence change. If we do the slow + * dentry compare, we will do seqretries until it is stable, + * and if we end up with a successful lookup, we actually + * want to exit RCU lookup anyway. + */ + seq = raw_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) continue; - tlen = dentry->d_name.len; - tname = dentry->d_name.name; - i = dentry->d_inode; - prefetch(tname); - /* - * This seqcount check is required to ensure name and - * len are loaded atomically, so as not to walk off the - * edge of memory when walking. If we could load this - * atomically some other way, we could drop this check. - */ - if (read_seqcount_retry(&dentry->d_seq, seq)) - goto seqretry; + *seqp = seq; + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { - if (parent->d_op->d_compare(parent, *inode, - dentry, i, - tlen, tname, name)) + if (dentry->d_name.hash != hashlen_hash(hashlen)) continue; - } else { - if (dentry_cmp(tname, tlen, str, len)) + switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { + case D_COMP_OK: + return dentry; + case D_COMP_NOMATCH: continue; + default: + goto seqretry; + } } - /* - * No extra seqcount check is required after the name - * compare. The caller must perform a seqcount check in - * order to do anything useful with the returned dentry - * anyway. - */ - *seqp = seq; - *inode = i; - return dentry; + + if (dentry->d_name.hash_len != hashlen) + continue; + if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) + return dentry; } return NULL; } @@ -1908,8 +1972,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) rcu_read_lock(); hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { - const char *tname; - int tlen; if (dentry->d_name.hash != hash) continue; @@ -1924,15 +1986,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). */ - tlen = dentry->d_name.len; - tname = dentry->d_name.name; if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) goto next; } else { - if (dentry_cmp(tname, tlen, str, len)) + if (dentry->d_name.len != len) + goto next; + if (dentry_cmp(dentry, str, len)) goto next; } @@ -2414,7 +2478,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode, 0); + alias = __d_find_alias(inode); if (alias) { actual = alias; write_seqlock(&rename_lock); @@ -2507,7 +2571,7 @@ static int prepend_path(const struct path *path, bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2538,7 +2602,7 @@ static int prepend_path(const struct path *path, error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: @@ -3025,6 +3089,7 @@ static void __init dcache_init_early(void) HASH_EARLY, &d_hash_shift, &d_hash_mask, + 0, 0); for (loop = 0; loop < (1U << d_hash_shift); loop++) @@ -3055,6 +3120,7 @@ static void __init dcache_init(void) 0, &d_hash_shift, &d_hash_mask, + 0, 0); for (loop = 0; loop < (1U << d_hash_shift); loop++) |