From 630d9c47274aa89bfa77fe6556d7818bdcb12992 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 16 Nov 2011 23:57:37 -0500 Subject: fs: reduce the use of module.h wherever possible For files only using THIS_MODULE and/or EXPORT_SYMBOL, map them onto including export.h -- or if the file isn't even using those, then just delete the include. Fix up any implicit include dependencies that were being masked by module.h along the way. Signed-off-by: Paul Gortmaker --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index fe19ac13f75..303ebd98bc8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From bfcfaa77bdf0f775263e906015982a608df01c76 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Mar 2012 11:16:17 -0800 Subject: vfs: use 'unsigned long' accesses for dcache name comparison and hashing Ok, this is hacky, and only works on little-endian machines with goo unaligned handling. And even then only with CONFIG_DEBUG_PAGEALLOC disabled, since it can access up to 7 bytes after the pathname. But it runs like a bat out of hell. Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + fs/Kconfig | 4 ++ fs/dcache.c | 23 +++++++++++ fs/namei.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+) (limited to 'fs/dcache.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bed94e189f..09675d3e0ac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -82,6 +82,7 @@ config X86 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP + select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) diff --git a/fs/Kconfig b/fs/Kconfig index d621f02a3f9..aa195265362 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -4,6 +4,10 @@ menu "File systems" +# Use unaligned word dcache accesses +config DCACHE_WORD_ACCESS + bool + if BLOCK source "fs/ext2/Kconfig" diff --git a/fs/dcache.c b/fs/dcache.c index bcbdb33fcc2..ffd47a16d87 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, static inline int dentry_cmp(const unsigned char *cs, size_t scount, const unsigned char *ct, size_t tcount) { +#ifdef CONFIG_DCACHE_WORD_ACCESS + unsigned long a,b,mask; + + if (unlikely(scount != tcount)) + return 1; + + for (;;) { + a = *(unsigned long *)cs; + b = *(unsigned long *)ct; + if (tcount < sizeof(unsigned long)) + break; + if (unlikely(a != b)) + return 1; + cs += sizeof(unsigned long); + ct += sizeof(unsigned long); + tcount -= sizeof(unsigned long); + if (!tcount) + return 0; + } + mask = ~(~0ul << tcount*8); + return unlikely(!!((a ^ b) & mask)); +#else if (scount != tcount) return 1; @@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, tcount--; } while (tcount); return 0; +#endif } static void __d_free(struct rcu_head *head) diff --git a/fs/namei.c b/fs/namei.c index e2ba62820a0..378497a744b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode) return 1; } +/* + * We can do the critical dentry name comparison and hashing + * operations one word at a time, but we are limited to: + * + * - Architectures with fast unaligned word accesses. We could + * do a "get_unaligned()" if this helps and is sufficiently + * fast. + * + * - Little-endian machines (so that we can generate the mask + * of low bytes efficiently). Again, we *could* do a byte + * swapping load on big-endian architectures if that is not + * expensive enough to make the optimization worthless. + * + * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we + * do not trap on the (extremely unlikely) case of a page + * crossing operation. + * + * - Furthermore, we need an efficient 64-bit compile for the + * 64-bit case in order to generate the "number of bytes in + * the final mask". Again, that could be replaced with a + * efficient population count instruction or similar. + */ +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608 >> 56; +} + +static inline unsigned int fold_hash(unsigned long hash) +{ + hash += hash >> (8*sizeof(int)); + return hash; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#define fold_hash(x) (x) + +#endif + +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long a, mask; + unsigned long hash = 0; + + for (;;) { + a = *(unsigned long *)name; + hash *= 9; + if (len < sizeof(unsigned long)) + break; + hash += a; + name += sizeof(unsigned long); + len -= sizeof(unsigned long); + if (!len) + goto done; + } + mask = ~(~0ul << len*8); + hash += mask & a; +done: + return fold_hash(hash); +} +EXPORT_SYMBOL(full_name_hash); + +#define ONEBYTES 0x0101010101010101ul +#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful +#define HIGHBITS 0x8080808080808080ul + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - ONEBYTES) & ~a) & HIGHBITS; +} + +/* + * Calculate the length and hash of the path component, and + * return the length of the component; + */ +static inline unsigned long hash_name(const char *name, unsigned int *hashp) +{ + unsigned long a, mask, hash, len; + + hash = a = 0; + len = -sizeof(unsigned long); + do { + hash = (hash + a) * 9; + len += sizeof(unsigned long); + a = *(unsigned long *)(name+len); + /* Do we have any NUL or '/' bytes in this word? */ + mask = has_zero(a) | has_zero(a ^ SLASHBYTES); + } while (!mask); + + /* The mask *below* the first high bit set */ + mask = (mask - 1) & ~mask; + mask >>= 7; + hash += a & mask; + *hashp = fold_hash(hash); + + return len + count_masked_bytes(mask); +} + +#else + unsigned int full_name_hash(const unsigned char *name, unsigned int len) { unsigned long hash = init_name_hash(); @@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) return len; } +#endif + /* * Name resolution. * This is the basic name resolution function, turning a pathname into -- cgit v1.2.3-70-g09d2 From 6d7d1a0dc735ea8412769edae7154885021107a9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 Mar 2012 16:19:53 -0700 Subject: vfs: get rid of batshit-insane pointless dentry hash calculations For some odd historical reason, the final mixing round for the dentry cache hash table lookup had an insane "xor with big constant" logic. In two places. The big constant that is being xor'ed is GOLDEN_RATIO_PRIME, which is a fairly random-looking number that is designed to be *multiplied* with so that the bits get spread out over a whole long-word. But xor'ing with it is insane. It doesn't really even change the hash - it really only shifts the hash around in the hash table. To make matters worse, the insane big constant is different on 32-bit and 64-bit builds, even though the name hash bits we use are always 32-bit (and the bits from the pointer we mix in effectively are too). It's all total voodoo programming, in other words. Now, some testing and analysis of the hash chains shows that the rest of the hash function seems to be fairly good. It does pick the right bits of the parent dentry pointer, for example, and while it's generally a bad idea to use an xor to mix down the upper bits (because if there is a repeating pattern, the xor can cause "destructive interference"), it seems to not have been a disaster. For example, replacing the hash with the normal "hash_long()" code (that uses the GOLDEN_RATIO_PRIME constant correctly, btw) actually just makes the hash worse. The hand-picked hash knew which bits of the pointer had the highest entropy, and hash_long() ends up mixing bits less optimally at least in some trivial tests. So the hash function overall seems fine, it just has that really odd "shift result around by a constant xor". So get rid of the silly xor, and replace the down-mixing of the bits with an add instead of an xor that tends to not have the same kind of destructive interference issues. Some stats on the resulting hash chains shows that they look statistically identical before and after, but the code is simpler and no longer makes you go "WTF?". Also, the incoming hash really is just "unsigned int", not a long, and there's no real point to worry about the high 26 bits of the dentry pointer for the 64-bit case, because they are all going to be identical anyway. So also change the hashing to be done in the more natural 'unsigned int' that is the real size of the actual hashed data anyway. Signed-off-by: Linus Torvalds --- fs/dcache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index bcbdb33fcc2..5f00a6f63c9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -105,10 +105,10 @@ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; static inline struct hlist_bl_head *d_hash(const struct dentry *parent, - unsigned long hash) + unsigned int hash) { - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + hash += (unsigned long) parent / L1_CACHE_BYTES; + hash = hash + (hash >> D_HASHBITS); return dentry_hashtable + (hash & D_HASHMASK); } -- cgit v1.2.3-70-g09d2 From 32991ab305ace7017c62f8eecbe5eb36dc32e13b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Feb 2012 22:15:47 -0500 Subject: vfs: d_alloc_root() gone all callers converted to d_make_root() by now Signed-off-by: Al Viro --- Documentation/filesystems/porting | 6 ++++++ fs/dcache.c | 24 ------------------------ include/linux/dcache.h | 1 - 3 files changed, 6 insertions(+), 25 deletions(-) (limited to 'fs/dcache.c') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index b4a3d765ff9..74acd961881 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -429,3 +429,9 @@ filemap_write_and_wait_range() so that all dirty pages are synced out properly. You must also keep in mind that ->fsync() is not called with i_mutex held anymore, so if you require i_mutex locking you must make sure to take it and release it yourself. + +-- +[mandatory] + d_alloc_root() is gone, along with a lot of bugs caused by code +misusing it. Replacement: d_make_root(inode). The difference is, +d_make_root() drops the reference to inode if dentry allocation fails. diff --git a/fs/dcache.c b/fs/dcache.c index bcbdb33fcc2..a78e145a435 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1443,30 +1443,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); -/** - * d_alloc_root - allocate root dentry - * @root_inode: inode to allocate the root for - * - * Allocate a root ("/") dentry for the inode given. The inode is - * instantiated and returned. %NULL is returned if there is insufficient - * memory or the inode passed is %NULL. - */ - -struct dentry * d_alloc_root(struct inode * root_inode) -{ - struct dentry *res = NULL; - - if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1 }; - - res = __d_alloc(root_inode->i_sb, &name); - if (res) - d_instantiate(res, root_inode); - } - return res; -} -EXPORT_SYMBOL(d_alloc_root); - struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ff5f5256d17..7e11f141820 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -222,7 +222,6 @@ extern void shrink_dcache_for_umount(struct super_block *); extern int d_invalidate(struct dentry *); /* only used at mount-time */ -extern struct dentry * d_alloc_root(struct inode *); extern struct dentry * d_make_root(struct inode *); /* - the ramfs-type tree */ -- cgit v1.2.3-70-g09d2 From 1f1e6e523e43e312c0e0d38c09828d53e9f709fc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 18 Mar 2012 21:23:05 -0700 Subject: fs: fix kernel-doc warnings in dcache.c Fix kernel-doc warnings in fs/dcache.c: Warning(fs/dcache.c:1743): No description found for parameter 'seqp' Warning(fs/dcache.c:1743): Excess function parameter 'seq' description in '__d_lookup_rcu' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index e441941c834..2b55bd0c106 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1713,7 +1713,7 @@ EXPORT_SYMBOL(d_add_ci); * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry * @name: qstr of name we wish to find - * @seq: returns d_seq value at the point where the dentry was found + * @seqp: returns d_seq value at the point where the dentry was found * @inode: returns dentry->d_inode when the inode was found valid. * Returns: dentry, or NULL * -- cgit v1.2.3-70-g09d2