diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-21 08:50:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-21 08:50:57 -0700 |
commit | 31ed8e6f93a27304c9e157dab0267772cd94eaad (patch) | |
tree | 2fd62bea73efa7e2920b0c3e1d81c425eb7bf71c /fs | |
parent | 7e5cb5e151c5474b4a468f437f5038ba9f67ef4d (diff) | |
parent | 26fe575028703948880fce4355a210c76bb0536e (diff) |
Merge branch 'dentry-cleanups' (dcache access cleanups and optimizations)
This branch simplifies and clarifies the dcache lookup, and allows us to
do certain nice optimizations when comparing dentries. It also cleans
up the interface to __d_lookup_rcu(), especially around passing the
inode information around.
* dentry-cleanups:
vfs: make it possible to access the dentry hash/len as one 64-bit entry
vfs: move dentry name length comparison from dentry_cmp() into callers
vfs: do the careful dentry name access for all dentry_cmp cases
vfs: remove unnecessary d_unhashed() check from __d_lookup_rcu
vfs: clean up __d_lookup_rcu() and dentry_cmp() interfaces
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dcache.c | 173 | ||||
-rw-r--r-- | fs/ext2/namei.c | 2 | ||||
-rw-r--r-- | fs/ext3/namei.c | 2 | ||||
-rw-r--r-- | fs/ext4/namei.c | 5 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 2 | ||||
-rw-r--r-- | fs/libfs.c | 4 | ||||
-rw-r--r-- | fs/namei.c | 19 | ||||
-rw-r--r-- | fs/nfs/dir.c | 5 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 3 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 3 | ||||
-rw-r--r-- | fs/nfs/proc.c | 3 | ||||
-rw-r--r-- | fs/nilfs2/namei.c | 2 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 2 | ||||
-rw-r--r-- | fs/ubifs/xattr.c | 6 | ||||
-rw-r--r-- | fs/udf/namei.c | 2 | ||||
-rw-r--r-- | fs/ufs/super.c | 5 |
16 files changed, 146 insertions, 92 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index b80531c9177..92099f61bc6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -153,16 +153,12 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, * In contrast, 'ct' and 'tcount' can be from a pathname, and do * need the careful unaligned handling. */ -static inline int dentry_cmp(const unsigned char *cs, size_t scount, - const unsigned char *ct, size_t tcount) +static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { unsigned long a,b,mask; - if (unlikely(scount != tcount)) - return 1; - for (;;) { - a = load_unaligned_zeropad(cs); + a = *(unsigned long *)cs; b = load_unaligned_zeropad(ct); if (tcount < sizeof(unsigned long)) break; @@ -180,12 +176,8 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, #else -static inline int dentry_cmp(const unsigned char *cs, size_t scount, - const unsigned char *ct, size_t tcount) +static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { - if (scount != tcount) - return 1; - do { if (*cs != *ct) return 1; @@ -198,6 +190,27 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, #endif +static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) +{ + /* + * Be careful about RCU walk racing with rename: + * use ACCESS_ONCE to fetch the name pointer. + * + * NOTE! Even if a rename will mean that the length + * was not loaded atomically, we don't care. The + * RCU walk will check the sequence count eventually, + * and catch it. And we won't overrun the buffer, + * because we're reading the name pointer atomically, + * and a dentry name is guaranteed to be properly + * terminated with a NUL byte. + * + * End result: even if 'len' is wrong, we'll exit + * early because the data cannot match (there can + * be no NUL in the ct/tcount data) + */ + return dentry_string_cmp(ACCESS_ONCE(dentry->d_name.name), ct, tcount); +} + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); @@ -1439,18 +1452,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, } list_for_each_entry(alias, &inode->i_dentry, d_alias) { - struct qstr *qstr = &alias->d_name; - /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or * parent changes, because the parent inode i_mutex is held. */ - if (qstr->hash != hash) + if (alias->d_name.hash != hash) continue; if (alias->d_parent != entry->d_parent) continue; - if (dentry_cmp(qstr->name, qstr->len, name, len)) + if (alias->d_name.len != len) + continue; + if (dentry_cmp(alias, name, len)) continue; __dget(alias); return alias; @@ -1489,7 +1502,7 @@ struct dentry *d_make_root(struct inode *root_inode) struct dentry *res = NULL; if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1 }; + static const struct qstr name = QSTR_INIT("/", 1); res = __d_alloc(root_inode->i_sb, &name); if (res) @@ -1727,6 +1740,48 @@ err_out: } EXPORT_SYMBOL(d_add_ci); +/* + * Do the slow-case of the dentry name compare. + * + * Unlike the dentry_cmp() function, we need to atomically + * load the name, length and inode information, so that the + * filesystem can rely on them, and can use the 'name' and + * 'len' information without worrying about walking off the + * end of memory etc. + * + * Thus the read_seqcount_retry() and the "duplicate" info + * in arguments (the low-level filesystem should not look + * at the dentry inode or name contents directly, since + * rename can change them while we're in RCU mode). + */ +enum slow_d_compare { + D_COMP_OK, + D_COMP_NOMATCH, + D_COMP_SEQRETRY, +}; + +static noinline enum slow_d_compare slow_dentry_cmp( + const struct dentry *parent, + struct inode *inode, + struct dentry *dentry, + unsigned int seq, + const struct qstr *name) +{ + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + struct inode *i = dentry->d_inode; + + if (read_seqcount_retry(&dentry->d_seq, seq)) { + cpu_relax(); + return D_COMP_SEQRETRY; + } + if (parent->d_op->d_compare(parent, inode, + dentry, i, + tlen, tname, name)) + return D_COMP_NOMATCH; + return D_COMP_OK; +} + /** * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry @@ -1753,15 +1808,17 @@ EXPORT_SYMBOL(d_add_ci); * the returned dentry, so long as its parent's seqlock is checked after the * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. + * + * NOTE! The caller *has* to check the resulting dentry against the sequence + * number we've returned before using any of the resulting dentry state! */ struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, - unsigned *seqp, struct inode **inode) + unsigned *seqp, struct inode *inode) { - unsigned int len = name->len; - unsigned int hash = name->hash; + u64 hashlen = name->hash_len; const unsigned char *str = name->name; - struct hlist_bl_head *b = d_hash(parent, hash); + struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); struct hlist_bl_node *node; struct dentry *dentry; @@ -1787,49 +1844,45 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { unsigned seq; - struct inode *i; - const char *tname; - int tlen; - - if (dentry->d_name.hash != hash) - continue; seqretry: - seq = read_seqcount_begin(&dentry->d_seq); - if (dentry->d_parent != parent) - continue; - if (d_unhashed(dentry)) - continue; - tlen = dentry->d_name.len; - tname = dentry->d_name.name; - i = dentry->d_inode; - prefetch(tname); /* - * This seqcount check is required to ensure name and - * len are loaded atomically, so as not to walk off the - * edge of memory when walking. If we could load this - * atomically some other way, we could drop this check. + * The dentry sequence count protects us from concurrent + * renames, and thus protects inode, parent and name fields. + * + * The caller must perform a seqcount check in order + * to do anything useful with the returned dentry, + * including using the 'd_inode' pointer. + * + * NOTE! We do a "raw" seqcount_begin here. That means that + * we don't wait for the sequence count to stabilize if it + * is in the middle of a sequence change. If we do the slow + * dentry compare, we will do seqretries until it is stable, + * and if we end up with a successful lookup, we actually + * want to exit RCU lookup anyway. */ - if (read_seqcount_retry(&dentry->d_seq, seq)) - goto seqretry; + seq = raw_seqcount_begin(&dentry->d_seq); + if (dentry->d_parent != parent) + continue; + *seqp = seq; + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { - if (parent->d_op->d_compare(parent, *inode, - dentry, i, - tlen, tname, name)) + if (dentry->d_name.hash != hashlen_hash(hashlen)) continue; - } else { - if (dentry_cmp(tname, tlen, str, len)) + switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { + case D_COMP_OK: + return dentry; + case D_COMP_NOMATCH: continue; + default: + goto seqretry; + } } - /* - * No extra seqcount check is required after the name - * compare. The caller must perform a seqcount check in - * order to do anything useful with the returned dentry - * anyway. - */ - *seqp = seq; - *inode = i; - return dentry; + + if (dentry->d_name.hash_len != hashlen) + continue; + if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) + return dentry; } return NULL; } @@ -1908,8 +1961,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) rcu_read_lock(); hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { - const char *tname; - int tlen; if (dentry->d_name.hash != hash) continue; @@ -1924,15 +1975,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). */ - tlen = dentry->d_name.len; - tname = dentry->d_name.name; if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) goto next; } else { - if (dentry_cmp(tname, tlen, str, len)) + if (dentry->d_name.len != len) + goto next; + if (dentry_cmp(dentry, str, len)) goto next; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index dffb8653628..f663a67d7bf 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -79,7 +79,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str struct dentry *ext2_get_parent(struct dentry *child) { - struct qstr dotdot = {.name = "..", .len = 2}; + struct qstr dotdot = QSTR_INIT("..", 2); unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot); if (!ino) return ERR_PTR(-ENOENT); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index d7940b24cf6..eeb63dfc5d2 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1045,7 +1045,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str struct dentry *ext3_get_parent(struct dentry *child) { unsigned long ino; - struct qstr dotdot = {.name = "..", .len = 2}; + struct qstr dotdot = QSTR_INIT("..", 2); struct ext3_dir_entry_2 * de; struct buffer_head *bh; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 349d7b3671c..e2a3f4b0ff7 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1052,10 +1052,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru struct dentry *ext4_get_parent(struct dentry *child) { __u32 ino; - static const struct qstr dotdot = { - .name = "..", - .len = 2, - }; + static const struct qstr dotdot = QSTR_INIT("..", 2); struct ext4_dir_entry_2 * de; struct buffer_head *bh; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index a836056343f..8aaeb07a07b 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -821,7 +821,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct buffer_head *bh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; - struct qstr name = { .name = "", .len = 0, .hash = 0 }; + struct qstr name = { .name = "" }; error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); if (error) diff --git a/fs/libfs.c b/fs/libfs.c index 18d08f5db53..f86ec27a423 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -68,7 +68,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na int dcache_dir_open(struct inode *inode, struct file *file) { - static struct qstr cursor_name = {.len = 1, .name = "."}; + static struct qstr cursor_name = QSTR_INIT(".", 1); file->private_data = d_alloc(file->f_path.dentry, &cursor_name); @@ -225,7 +225,7 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); struct dentry *dentry; struct inode *root; - struct qstr d_name = {.name = name, .len = strlen(name)}; + struct qstr d_name = QSTR_INIT(name, strlen(name)); if (IS_ERR(s)) return ERR_CAST(s); diff --git a/fs/namei.c b/fs/namei.c index 709fb2c17b5..f9e883c1b85 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1144,12 +1144,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, */ if (nd->flags & LOOKUP_RCU) { unsigned seq; - *inode = nd->inode; - dentry = __d_lookup_rcu(parent, name, &seq, inode); + dentry = __d_lookup_rcu(parent, name, &seq, nd->inode); if (!dentry) goto unlazy; - /* Memory barrier in read_seqcount_begin of child is enough */ + /* + * This sequence count validates that the inode matches + * the dentry name information from lookup. + */ + *inode = dentry->d_inode; + if (read_seqcount_retry(&dentry->d_seq, seq)) + return -ECHILD; + + /* + * This sequence count validates that the parent had no + * changes while we did the lookup of the dentry above. + * + * The memory barrier in read_seqcount_begin of child is + * enough, we can use __read_seqcount_retry here. + */ if (__read_seqcount_retry(&parent->d_seq, nd->seq)) return -ECHILD; nd->seq = seq; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8789210c690..eedd24d0ad2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -477,10 +477,7 @@ different: static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { - struct qstr filename = { - .len = entry->len, - .name = entry->name, - }; + struct qstr filename = QSTR_INIT(entry->name, entry->len); struct dentry *dentry; struct dentry *alias; struct inode *dir = parent->d_inode; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5242eae6711..75c68299358 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -398,8 +398,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name) { struct nfs_removeargs arg = { .fh = NFS_FH(dir), - .name.len = name->len, - .name.name = name->name, + .name = *name, }; struct nfs_removeres res; struct rpc_message msg = { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 99650aaf893..ab985f6f0da 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2782,8 +2782,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) struct nfs_server *server = NFS_SERVER(dir); struct nfs_removeargs args = { .fh = NFS_FH(dir), - .name.len = name->len, - .name.name = name->name, + .name = *name, .bitmask = server->attr_bitmask, }; struct nfs_removeres res = { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b63b6f4d14f..d6408b6437d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -335,8 +335,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name) { struct nfs_removeargs arg = { .fh = NFS_FH(dir), - .name.len = name->len, - .name.name = name->name, + .name = *name, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index fce2bbee66d..0bb2c2010b9 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -441,7 +441,7 @@ static struct dentry *nilfs_get_parent(struct dentry *child) { unsigned long ino; struct inode *inode; - struct qstr dotdot = {.name = "..", .len = 2}; + struct qstr dotdot = QSTR_INIT("..", 2); struct nilfs_root *root; ino = nilfs_inode_by_name(child->d_inode, &dotdot); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 16ad84d8402..abd51331345 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2361,7 +2361,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, * by passing 'ubifs_tnc_remove_nm()' the same key but * an unmatchable name. */ - struct qstr noname = { .len = 0, .name = "" }; + struct qstr noname = { .name = "" }; err = dbg_check_tnc(c, 0); mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 85b27226875..7a8bafa19c9 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -298,7 +298,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, { struct inode *inode, *host = dentry->d_inode; struct ubifs_info *c = host->i_sb->s_fs_info; - struct qstr nm = { .name = name, .len = strlen(name) }; + struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_dent_node *xent; union ubifs_key key; int err, type; @@ -361,7 +361,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, { struct inode *inode, *host = dentry->d_inode; struct ubifs_info *c = host->i_sb->s_fs_info; - struct qstr nm = { .name = name, .len = strlen(name) }; + struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_inode *ui; struct ubifs_dent_node *xent; union ubifs_key key; @@ -524,7 +524,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode, *host = dentry->d_inode; struct ubifs_info *c = host->i_sb->s_fs_info; - struct qstr nm = { .name = name, .len = strlen(name) }; + struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_dent_node *xent; union ubifs_key key; int err; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 38de8f234b9..a165c66e3ee 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1193,7 +1193,7 @@ static struct dentry *udf_get_parent(struct dentry *child) { struct kernel_lb_addr tloc; struct inode *inode = NULL; - struct qstr dotdot = {.name = "..", .len = 2}; + struct qstr dotdot = QSTR_INIT("..", 2); struct fileIdentDesc cfi; struct udf_fileident_bh fibh; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ac8e279eccc..302f340d007 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -146,10 +146,7 @@ static struct dentry *ufs_fh_to_parent(struct super_block *sb, struct fid *fid, static struct dentry *ufs_get_parent(struct dentry *child) { - struct qstr dot_dot = { - .name = "..", - .len = 2, - }; + struct qstr dot_dot = QSTR_INIT("..", 2); ino_t ino; ino = ufs_inode_by_name(child->d_inode, &dot_dot); |