From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:34:06 -0400 Subject: ->encode_fh() API change pass inode + parent's inode or NULL instead of dentry + bool saying whether we want the parent or not. NOTE: that needs ceph fix folded in. Signed-off-by: Al Viro --- include/linux/exportfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3a4cef5322d..12291a7ee27 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -165,8 +165,8 @@ struct fid { */ struct export_operations { - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); + int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, -- cgit v1.2.3-70-g09d2 From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 8 May 2012 13:29:45 +0930 Subject: lglock: remove online variants of lock Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell Acked-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 58 ++------------------------------------------------ 1 file changed, 2 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 87f402ccec5..0fdd821e77b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ -- cgit v1.2.3-70-g09d2 From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:24 +0930 Subject: brlocks/lglocks: turn into functions lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro --- fs/file_table.c | 1 - fs/internal.h | 2 +- include/linux/lglock.h | 125 ++++++++++--------------------------------------- kernel/Makefile | 2 +- kernel/lglock.c | 89 +++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 102 deletions(-) create mode 100644 kernel/lglock.c (limited to 'include/linux') diff --git a/fs/file_table.c b/fs/file_table.c index 70f2a0fd6ae..f5c67c59ec1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ diff --git a/fs/internal.h b/fs/internal.h index 9962c59ba28..8040af489c7 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); +extern struct lglock vfsmount_lock; /* diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0fdd821e77b..f01e5f6d1f0 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include #include #include +#include /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b..296132c19a5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 index 00000000000..6535a667a5a --- /dev/null +++ b/kernel/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include +#include +#include +#include + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); -- cgit v1.2.3-70-g09d2 From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro --- arch/alpha/include/asm/posix_types.h | 3 --- arch/arm/include/asm/posix_types.h | 3 --- arch/avr32/include/asm/posix_types.h | 3 --- arch/blackfin/include/asm/posix_types.h | 3 --- arch/cris/include/asm/posix_types.h | 3 --- arch/frv/include/asm/posix_types.h | 3 --- arch/h8300/include/asm/posix_types.h | 3 --- arch/ia64/include/asm/posix_types.h | 3 --- arch/m32r/include/asm/posix_types.h | 3 --- arch/m68k/include/asm/posix_types.h | 3 --- arch/mips/include/asm/posix_types.h | 5 ----- arch/mn10300/include/asm/posix_types.h | 3 --- arch/parisc/include/asm/posix_types.h | 3 --- arch/powerpc/include/asm/posix_types.h | 3 --- arch/s390/include/asm/posix_types.h | 3 --- arch/sh/include/asm/posix_types_32.h | 2 -- arch/sh/include/asm/posix_types_64.h | 2 -- arch/sparc/include/asm/posix_types.h | 5 ----- arch/tile/include/asm/compat.h | 1 - arch/x86/include/asm/posix_types_32.h | 3 --- include/asm-generic/posix_types.h | 4 ---- include/linux/types.h | 2 +- 22 files changed, 1 insertion(+), 65 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h index 24779fc9599..5a8a48320ef 100644 --- a/arch/alpha/include/asm/posix_types.h +++ b/arch/alpha/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned int __kernel_ino_t; #define __kernel_ino_t __kernel_ino_t -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h index efdf99045d8..d2de9cbbcd9 100644 --- a/arch/arm/include/asm/posix_types.h +++ b/arch/arm/include/asm/posix_types.h @@ -22,9 +22,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/avr32/include/asm/posix_types.h b/arch/avr32/include/asm/posix_types.h index 74667bfc88c..9ba9e749b3f 100644 --- a/arch/avr32/include/asm/posix_types.h +++ b/arch/avr32/include/asm/posix_types.h @@ -17,9 +17,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/asm/posix_types.h index 41bc1875c4d..1bd3436db6a 100644 --- a/arch/blackfin/include/asm/posix_types.h +++ b/arch/blackfin/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned int __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h index 234891c74e2..ce4e5179315 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/asm/posix_types.h @@ -15,9 +15,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/frv/include/asm/posix_types.h b/arch/frv/include/asm/posix_types.h index 3f34cb45fbb..fe512af74a5 100644 --- a/arch/frv/include/asm/posix_types.h +++ b/arch/frv/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/asm/posix_types.h index bc4c34efb1a..91e62ba4c7b 100644 --- a/arch/h8300/include/asm/posix_types.h +++ b/arch/h8300/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h index 7323ab9467e..99ee1d6510c 100644 --- a/arch/ia64/include/asm/posix_types.h +++ b/arch/ia64/include/asm/posix_types.h @@ -1,9 +1,6 @@ #ifndef _ASM_IA64_POSIX_TYPES_H #define _ASM_IA64_POSIX_TYPES_H -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/asm/posix_types.h index 0195850e1f8..236de26a409 100644 --- a/arch/m32r/include/asm/posix_types.h +++ b/arch/m32r/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/m68k/include/asm/posix_types.h b/arch/m68k/include/asm/posix_types.h index 6373093be72..cf4dbf70fdc 100644 --- a/arch/m68k/include/asm/posix_types.h +++ b/arch/m68k/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/mips/include/asm/posix_types.h b/arch/mips/include/asm/posix_types.h index e0308dcca13..fa03ec3fbf8 100644 --- a/arch/mips/include/asm/posix_types.h +++ b/arch/mips/include/asm/posix_types.h @@ -17,11 +17,6 @@ * assume GCC is being used. */ -#if (_MIPS_SZLONG == 64) -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t -#endif - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/mn10300/include/asm/posix_types.h b/arch/mn10300/include/asm/posix_types.h index ab506181ec3..d31eeea480c 100644 --- a/arch/mn10300/include/asm/posix_types.h +++ b/arch/mn10300/include/asm/posix_types.h @@ -20,9 +20,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h index 5212b0357da..b9344256f76 100644 --- a/arch/parisc/include/asm/posix_types.h +++ b/arch/parisc/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h index f1393252bbd..2958c5b97b2 100644 --- a/arch/powerpc/include/asm/posix_types.h +++ b/arch/powerpc/include/asm/posix_types.h @@ -16,9 +16,6 @@ typedef int __kernel_ssize_t; typedef long __kernel_ptrdiff_t; #define __kernel_size_t __kernel_size_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h index edf8527ff08..7be104c0f19 100644 --- a/arch/s390/include/asm/posix_types.h +++ b/arch/s390/include/asm/posix_types.h @@ -24,7 +24,6 @@ typedef unsigned short __kernel_old_dev_t; typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_nlink_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; @@ -35,7 +34,6 @@ typedef int __kernel_ptrdiff_t; typedef unsigned int __kernel_ino_t; typedef unsigned int __kernel_mode_t; -typedef unsigned int __kernel_nlink_t; typedef int __kernel_ipc_pid_t; typedef unsigned int __kernel_uid_t; typedef unsigned int __kernel_gid_t; @@ -47,7 +45,6 @@ typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #define __kernel_ino_t __kernel_ino_t #define __kernel_mode_t __kernel_mode_t -#define __kernel_nlink_t __kernel_nlink_t #define __kernel_ipc_pid_t __kernel_ipc_pid_t #define __kernel_uid_t __kernel_uid_t #define __kernel_gid_t __kernel_gid_t diff --git a/arch/sh/include/asm/posix_types_32.h b/arch/sh/include/asm/posix_types_32.h index abda58467ec..ba0bdc423b0 100644 --- a/arch/sh/include/asm/posix_types_32.h +++ b/arch/sh/include/asm/posix_types_32.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sh/include/asm/posix_types_64.h b/arch/sh/include/asm/posix_types_64.h index fcda07b4a61..244f7e950e1 100644 --- a/arch/sh/include/asm/posix_types_64.h +++ b/arch/sh/include/asm/posix_types_64.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h index 3070f25ae90..156220ed99e 100644 --- a/arch/sparc/include/asm/posix_types.h +++ b/arch/sparc/include/asm/posix_types.h @@ -9,8 +9,6 @@ #if defined(__sparc__) && defined(__arch64__) /* sparc 64 bit */ -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; @@ -38,9 +36,6 @@ typedef unsigned short __kernel_gid_t; typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 69adc08d36a..6e74450ff0a 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; -typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e04b9..8e525059e7d 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd4dde..fe74fccf18d 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h @@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif -#ifndef __kernel_nlink_t -typedef __kernel_ulong_t __kernel_nlink_t; -#endif - #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif diff --git a/include/linux/types.h b/include/linux/types.h index 7f480db6023..9c1bd539ea7 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; -typedef __kernel_nlink_t nlink_t; +typedef __u32 nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; typedef __kernel_daddr_t daddr_t; -- cgit v1.2.3-70-g09d2 From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 May 2012 11:02:24 -0700 Subject: fsnotify: handle subfiles' perm events Recently I'm working on fanotify and found the following strange behaviors. I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY all events notified. fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is blocked as expected. But, fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo aaa It's not blocked anymore. This is confusing behavior. Also reading commit "fsnotify: call fsnotify_parent in perm events", it seems like fsnotify should handle subfiles' perm events as well as the other notify events. With this patch, regardless of FAN_ALL_EVENTS set or not: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is now blocked properly. FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD. Due to fsnotify_inode_watches_children() check, if you only specify only these events as fsnotify_mask, you don't get subfiles' perm events notified. This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified even if only these events are specified to fsnotify_mask. Signed-off-by: Naohiro Aota Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 91d0e0a34ef..63d966d5c2e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -60,7 +60,7 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE) + FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) -- cgit v1.2.3-70-g09d2 From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:11:37 -0400 Subject: split cap_mmap_addr() out of cap_file_mmap() ... switch callers. Signed-off-by: Al Viro --- include/linux/security.h | 3 ++- security/apparmor/lsm.c | 2 +- security/commoncap.c | 32 +++++++++++++++++++++++--------- security/selinux/hooks.c | 2 +- security/smack/smack_lsm.c | 2 +- 5 files changed, 28 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index ab0e091ce5f..4ad59c9fa73 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_mmap_addr(unsigned long addr); extern int cap_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only); @@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + return cap_mmap_addr(addr); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 032daab449b..8430d8937af 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -497,7 +497,7 @@ static int apparmor_file_mmap(struct file *file, unsigned long reqprot, int rc = 0; /* do DAC check */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/commoncap.c b/security/commoncap.c index e771cb1b2d7..ebac3618896 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -958,22 +958,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) } /* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused + * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped - * @addr_only: unused * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. Returns 0 if this mapping should be allowed * -EPERM if not. */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_addr(unsigned long addr) { int ret = 0; @@ -986,3 +979,24 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, } return ret; } + +/* + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused + * @addr: address attempting to be mapped + * @addr_only: unused + * + * If the process is attempting to map memory below dac_mmap_min_addr they need + * CAP_SYS_RAWIO. The other parameters to this function are unused by the + * capability security module. Returns 0 if this mapping should be allowed + * -EPERM if not. + */ +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) +{ + return cap_mmap_addr(addr); +} diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fa2341b6833..25c125eaa3d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3104,7 +3104,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d583c054580..a6219771876 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1199,7 +1199,7 @@ static int smack_file_mmap(struct file *file, int rc; /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; -- cgit v1.2.3-70-g09d2 From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:30:51 -0400 Subject: split ->file_mmap() into ->mmap_addr()/->mmap_file() ... i.e. file-dependent and address-dependent checks. Signed-off-by: Al Viro --- fs/exec.c | 4 ---- include/linux/security.h | 36 ++++++++++++++++++++---------------- mm/mmap.c | 12 ++++++++---- mm/mremap.c | 4 ++-- mm/nommu.c | 5 ++++- security/apparmor/lsm.c | 15 ++++----------- security/capability.c | 3 ++- security/commoncap.c | 21 +++------------------ security/security.c | 12 ++++++++---- security/selinux/hooks.c | 15 ++++++++------- security/smack/smack_lsm.c | 15 +++++---------- 11 files changed, 64 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 52c9e2ff6e6..a79786a8d2c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -280,10 +280,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); - err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); - if (err) - goto err; - err = insert_vm_struct(mm, vma); if (err) goto err; diff --git a/include/linux/security.h b/include/linux/security.h index 4ad59c9fa73..f1bae0963dd 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_mmap_addr(unsigned long addr); -extern int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +extern int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @file_mmap : + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). * @reqprot contains the protection requested by the application. * @prot contains the protection that will be applied by the kernel. * @flags contains the operational flags. - * @addr contains virtual address that will be used for the operation. - * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. * Return 0 if permission is granted. * @file_mprotect: * Check permissions before changing memory access permissions. @@ -1482,10 +1483,10 @@ struct security_operations { void (*file_free_security) (struct file *file); int (*file_ioctl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_mmap) (struct file *file, + int (*mmap_addr) (unsigned long addr); + int (*mmap_file) (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only); + unsigned long flags); int (*file_mprotect) (struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); +int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); @@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_mmap(struct file *file, unsigned long reqprot, +static inline int security_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, - unsigned long addr, - unsigned long addr_only) + unsigned long flags) +{ + return 0; +} + +static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); } diff --git a/mm/mmap.c b/mm/mmap.c index 83c56624f1f..49283da9a2a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1101,7 +1101,11 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - error = security_file_mmap(file, reqprot, prot, flags, addr, 0); + error = security_mmap_addr(addr); + if (error) + return error; + + error = security_mmap_file(file, reqprot, prot, flags); if (error) return error; @@ -1817,7 +1821,7 @@ int expand_downwards(struct vm_area_struct *vma, return -ENOMEM; address &= PAGE_MASK; - error = security_file_mmap(NULL, 0, 0, 0, address, 1); + error = security_mmap_addr(address); if (error) return error; @@ -2205,7 +2209,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - error = security_file_mmap(NULL, 0, 0, 0, addr, 1); + error = security_mmap_addr(addr); if (error) return error; @@ -2561,7 +2565,7 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + ret = security_mmap_addr(vma->vm_start); if (ret) goto out; diff --git a/mm/mremap.c b/mm/mremap.c index 169c53b8774..ebf10892b63 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -371,7 +371,7 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; @@ -532,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); diff --git a/mm/nommu.c b/mm/nommu.c index de6084e3a04..acfe419785d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1047,7 +1047,10 @@ static int validate_mmap_request(struct file *file, } /* allow the security API to have its say */ - ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); + ret = security_mmap_addr(addr); + if (ret < 0) + return ret; + ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8430d8937af..8ea39aabe94 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -490,17 +490,9 @@ static int common_mmap(int op, struct file *file, unsigned long prot, return common_file_perm(op, file, mask); } -static int apparmor_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int apparmor_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - int rc = 0; - - /* do DAC check */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - return common_mmap(OP_FMMAP, file, prot, flags); } @@ -646,7 +638,8 @@ static struct security_operations apparmor_ops = { .file_permission = apparmor_file_permission, .file_alloc_security = apparmor_file_alloc_security, .file_free_security = apparmor_file_free_security, - .file_mmap = apparmor_file_mmap, + .mmap_file = apparmor_mmap_file, + .mmap_addr = cap_mmap_addr, .file_mprotect = apparmor_file_mprotect, .file_lock = apparmor_file_lock, diff --git a/security/capability.c b/security/capability.c index fca889676c5..61095df8b89 100644 --- a/security/capability.c +++ b/security/capability.c @@ -949,7 +949,8 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_alloc_security); set_to_cap_if_null(ops, file_free_security); set_to_cap_if_null(ops, file_ioctl); - set_to_cap_if_null(ops, file_mmap); + set_to_cap_if_null(ops, mmap_addr); + set_to_cap_if_null(ops, mmap_file); set_to_cap_if_null(ops, file_mprotect); set_to_cap_if_null(ops, file_lock); set_to_cap_if_null(ops, file_fcntl); diff --git a/security/commoncap.c b/security/commoncap.c index ebac3618896..6dbae4650ab 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -980,23 +980,8 @@ int cap_mmap_addr(unsigned long addr) return ret; } -/* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused - * @addr: address attempting to be mapped - * @addr_only: unused - * - * If the process is attempting to map memory below dac_mmap_min_addr they need - * CAP_SYS_RAWIO. The other parameters to this function are unused by the - * capability security module. Returns 0 if this mapping should be allowed - * -EPERM if not. - */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - return cap_mmap_addr(addr); + return 0; } diff --git a/security/security.c b/security/security.c index 5497a57fba0..d91c66d3956 100644 --- a/security/security.c +++ b/security/security.c @@ -657,18 +657,22 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { int ret; - ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; return ima_file_mmap(file, prot); } +int security_mmap_addr(unsigned long addr) +{ + return security_ops->mmap_addr(addr); +} + int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 25c125eaa3d..372ec6502aa 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3083,9 +3083,7 @@ error: return rc; } -static int selinux_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int selinux_mmap_addr(unsigned long addr) { int rc = 0; u32 sid = current_sid(); @@ -3104,10 +3102,12 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; + return cap_mmap_addr(addr); +} +static int selinux_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) +{ if (selinux_checkreqprot) prot = reqprot; @@ -5570,7 +5570,8 @@ static struct security_operations selinux_ops = { .file_alloc_security = selinux_file_alloc_security, .file_free_security = selinux_file_free_security, .file_ioctl = selinux_file_ioctl, - .file_mmap = selinux_file_mmap, + .mmap_file = selinux_mmap_file, + .mmap_addr = selinux_mmap_addr, .file_mprotect = selinux_file_mprotect, .file_lock = selinux_file_lock, .file_fcntl = selinux_file_fcntl, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index a6219771876..ee0bb5735f3 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, } /** - * smack_file_mmap : + * smack_mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). @@ -1180,10 +1180,9 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, * @flags contains the operational flags. * Return 0 if permission is granted. */ -static int smack_file_mmap(struct file *file, +static int smack_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only) + unsigned long flags) { struct smack_known *skp; struct smack_rule *srp; @@ -1198,11 +1197,6 @@ static int smack_file_mmap(struct file *file, int tmay; int rc; - /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - if (file == NULL || file->f_dentry == NULL) return 0; @@ -3482,7 +3476,8 @@ struct security_operations smack_ops = { .file_ioctl = smack_file_ioctl, .file_lock = smack_file_lock, .file_fcntl = smack_file_fcntl, - .file_mmap = smack_file_mmap, + .mmap_file = smack_mmap_file, + .mmap_addr = cap_mmap_addr, .file_set_fowner = smack_file_set_fowner, .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, -- cgit v1.2.3-70-g09d2 From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:11:23 -0400 Subject: take security_mmap_file() outside of ->mmap_sem Signed-off-by: Al Viro --- include/linux/security.h | 7 +++---- ipc/shm.c | 5 +++++ mm/mmap.c | 23 ++++++++++++----------- mm/nommu.c | 22 ++++++++++++---------- security/security.c | 33 ++++++++++++++++++++++++++++++--- 5 files changed, 62 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f1bae0963dd..4e5a73cdbbe 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, +static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { return 0; diff --git a/ipc/shm.c b/ipc/shm.c index 406c5b20819..e3a8063b176 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1036,6 +1036,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) sfd->file = shp->shm_file; sfd->vm_ops = NULL; + err = security_mmap_file(file, prot, flags); + if (err) + goto out_fput; + down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; @@ -1058,6 +1062,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) invalid: up_write(¤t->mm->mmap_sem); +out_fput: fput(file); out_nattch: diff --git a/mm/mmap.c b/mm/mmap.c index 49283da9a2a..34b280f4238 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -979,7 +979,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct inode *inode; vm_flags_t vm_flags; int error; - unsigned long reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1105,10 +1104,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (error) return error; - error = security_mmap_file(file, reqprot, prot, flags); - if (error) - return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1130,9 +1125,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1168,9 +1166,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + retval = security_mmap_file(file, prot, flags); + if (!retval) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/mm/nommu.c b/mm/nommu.c index acfe419785d..8cbfd623b04 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -889,7 +889,6 @@ static int validate_mmap_request(struct file *file, unsigned long *_capabilities) { unsigned long capabilities, rlen; - unsigned long reqprot = prot; int ret; /* do the simple checks first */ @@ -1048,9 +1047,6 @@ static int validate_mmap_request(struct file *file, /* allow the security API to have its say */ ret = security_mmap_addr(addr); - if (ret < 0) - return ret; - ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; @@ -1492,9 +1488,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1515,9 +1514,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + ret = security_mmap_file(file, prot, flags); + if (!ret) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/security/security.c b/security/security.c index d91c66d3956..3b11b3b72fe 100644 --- a/security/security.c +++ b/security/security.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #define MAX_LSM_EVM_XATTR 2 @@ -657,11 +660,35 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags) { + unsigned long reqprot = prot; int ret; - + /* + * Does the application expect PROT_READ to imply PROT_EXEC? + * + * (the exception is when the underlying filesystem is noexec + * mounted, in which case we dont add PROT_EXEC.) + */ + if (!(reqprot & PROT_READ)) + goto out; + if (!(current->personality & READ_IMPLIES_EXEC)) + goto out; + if (!file) { + prot |= PROT_EXEC; + } else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { +#ifndef CONFIG_MMU + unsigned long caps = 0; + struct address_space *mapping = file->f_mapping; + if (mapping && mapping->backing_dev_info) + caps = mapping->backing_dev_info->capabilities; + if (!(caps & BDI_CAP_EXEC_MAP)) + goto out; +#endif + prot |= PROT_EXEC; + } +out: ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; -- cgit v1.2.3-70-g09d2 From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:08:42 -0400 Subject: switch aio and shm to do_mmap_pgoff(), make do_mmap() static after all, 0 bytes and 0 pages is the same thing... Signed-off-by: Al Viro --- fs/aio.c | 6 +++--- include/linux/mm.h | 2 +- ipc/shm.c | 2 +- mm/mmap.c | 4 ++-- mm/nommu.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index e7f2fad7b4c..07154d99cc6 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx) info->mmap_size = nr_pages * PAGE_SIZE; dprintk("attempting mmap of %lu bytes\n", info->mmap_size); down_write(&ctx->mm->mmap_sem); - info->mmap_base = do_mmap(NULL, 0, info->mmap_size, - PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, - 0); + info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); info->mmap_size = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d5c37f24c6..4189e0d0ac0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap(struct file *, unsigned long, +extern unsigned long do_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int do_munmap(struct mm_struct *, unsigned long, size_t); diff --git a/ipc/shm.c b/ipc/shm.c index e3a8063b176..5e2cbfdab6f 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1054,7 +1054,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) goto invalid; } - user_addr = do_mmap (file, addr, size, prot, flags, 0); + user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); *raddr = user_addr; err = 0; if (IS_ERR_VALUE(user_addr)) diff --git a/mm/mmap.c b/mm/mmap.c index 131521e12f1..f7786542c59 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -971,7 +971,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) * The caller must hold down_write(¤t->mm->mmap_sem). */ -static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { @@ -1102,7 +1102,7 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { diff --git a/mm/nommu.c b/mm/nommu.c index 8cbfd623b04..a1792ed2cb1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1232,7 +1232,7 @@ enomem: /* * handle mapping creation for uClinux */ -static unsigned long do_mmap_pgoff(struct file *file, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, @@ -1470,7 +1470,7 @@ error_getting_region: return -ENOMEM; } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { -- cgit v1.2.3-70-g09d2 From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:59:21 -0400 Subject: fs: introduce inode operation ->update_time Btrfs has to make sure we have space to allocate new blocks in order to modify the inode, so updating time can fail. We've gotten around this by having our own file_update_time but this is kind of a pain, and Christoph has indicated he would like to make xfs do something different with atime updates. So introduce ->update_time, where we will deal with i_version an a/m/c time updates and indicate which changes need to be made. The normal version just does what it has always done, updates the time and marks the inode dirty, and then filesystems can choose to do something different. I've gone through all of the users of file_update_time and made them check for errors with the exception of the fault code since it's complicated and I wasn't quite sure what to do there, also Jan is going to be pushing the file time updates into page_mkwrite for those who have it so that should satisfy btrfs and make it not a big deal to check the file_update_time() return code in the generic fault path. Thanks, Signed-off-by: Josef Bacik --- Documentation/filesystems/Locking | 3 +++ Documentation/filesystems/vfs.txt | 4 +++ fs/fuse/file.c | 4 ++- fs/inode.c | 56 ++++++++++++++++++++++++++++----------- fs/ncpfs/file.c | 6 +++-- fs/ntfs/file.c | 4 ++- fs/pipe.c | 7 +++-- fs/splice.c | 6 +++-- fs/xfs/xfs_file.c | 7 +++-- include/linux/fs.h | 10 ++++++- mm/filemap.c | 4 ++- mm/filemap_xip.c | 4 ++- 12 files changed, 86 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4fca82e5276..d5a269a51a9 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,6 +62,7 @@ ata *); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + void (*update_time)(struct inode *, struct timespec *, int); locking rules: all may block @@ -89,6 +90,8 @@ listxattr: no removexattr: yes truncate_range: yes fiemap: no +update_time: no + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 0d049202808..b2aa722e5ea 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + void (*update_time)(struct inode *, struct timespec *, int); }; Again, all methods are called without any locks being held, unless @@ -475,6 +476,9 @@ otherwise noted. truncate_range: a method provided by the underlying filesystem to truncate a range of blocks , i.e. punch a hole somewhere in a file. + update_time: called by the VFS to update a specific time or the i_version of + an inode. If this is not defined the VFS will update the inode itself + and call mark_inode_dirty_sync. The Address Space Object ======================== diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504e61b7fd7..9562109d3a8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -962,7 +962,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; if (file->f_flags & O_DIRECT) { written = generic_file_direct_write(iocb, iov, &nr_segs, diff --git a/fs/inode.c b/fs/inode.c index a79555e492e..f0335fc315e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1487,6 +1487,27 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } +/* + * This does the actual work of updating an inodes time or version. Must have + * had called mnt_want_write() before calling this. + */ +static int update_time(struct inode *inode, struct timespec *time, int flags) +{ + if (inode->i_op->update_time) + return inode->i_op->update_time(inode, time, flags); + + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_VERSION) + inode_inc_iversion(inode); + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + mark_inode_dirty_sync(inode); + return 0; +} + /** * touch_atime - update the access time * @path: the &struct path to update @@ -1524,8 +1545,14 @@ void touch_atime(struct path *path) if (mnt_want_write(mnt)) return; - inode->i_atime = now; - mark_inode_dirty_sync(inode); + /* + * File systems can error out when updating inodes if they need to + * allocate new space to modify an inode (such is the case for + * Btrfs), but since we touch atime while walking down the path we + * really don't care if we failed to update the atime of the file, + * so just ignore the return value. + */ + update_time(inode, &now, S_ATIME); mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1604,18 +1631,20 @@ EXPORT_SYMBOL(file_remove_suid); * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the * S_NOCMTIME inode flag, e.g. for network filesystem where these - * timestamps are handled by the server. + * timestamps are handled by the server. This can return an error for + * file systems who need to allocate space in order to update an inode. */ -void file_update_time(struct file *file) +int file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + int sync_it = 0; + int ret; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) - return; + return 0; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_mtime, &now)) @@ -1628,21 +1657,16 @@ void file_update_time(struct file *file) sync_it |= S_VERSION; if (!sync_it) - return; + return 0; /* Finally allowed to write? Takes lock. */ if (mnt_want_write_file(file)) - return; + return 0; - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) - inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - mark_inode_dirty_sync(inode); + ret = update_time(inode, &now, sync_it); mnt_drop_write_file(file); + + return ret; } EXPORT_SYMBOL(file_update_time); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 3ff5fcc1528..122e260247f 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -221,6 +221,10 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * already_written = 0; + errno = file_update_time(file); + if (errno) + goto outrel; + bouncebuffer = vmalloc(bufsize); if (!bouncebuffer) { errno = -EIO; /* -ENOMEM */ @@ -252,8 +256,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); - file_update_time(file); - *ppos = pos; if (pos > i_size_read(inode)) { diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8639169221c..7389d2d5e51 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2096,7 +2096,9 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = file_remove_suid(file); if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: diff --git a/fs/pipe.c b/fs/pipe.c index 95ebb56de49..49c1065256f 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -654,8 +654,11 @@ out: wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - if (ret > 0) - file_update_time(filp); + if (ret > 0) { + int err = file_update_time(filp); + if (err) + ret = err; + } return ret; } diff --git a/fs/splice.c b/fs/splice.c index f8476841eb0..47c4c1ad0c0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1003,8 +1003,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) { - file_update_time(out); - ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + ret = file_update_time(out); + if (!ret) + ret = splice_from_pipe_feed(pipe, &sd, + pipe_to_file); } mutex_unlock(&inode->i_mutex); } while (ret > 0); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8d214b87f6b..9f7ec15a652 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -586,8 +586,11 @@ restart: * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); + if (likely(!(file->f_mode & FMODE_NOCMTIME))) { + error = file_update_time(file); + if (error) + return error; + } /* * If we're writing the file then make sure to clear the setuid and diff --git a/include/linux/fs.h b/include/linux/fs.h index cdc1a963094..57fc70574d2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1684,6 +1684,7 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + int (*update_time)(struct inode *, struct timespec *, int); } ____cacheline_aligned; struct seq_file; @@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode) spin_unlock(&inode->i_lock); } +enum file_time_flags { + S_ATIME = 1, + S_MTIME = 2, + S_CTIME = 4, + S_VERSION = 8, +}; + extern void touch_atime(struct path *); static inline void file_accessed(struct file *file) { @@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -extern void file_update_time(struct file *file); +extern int file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); diff --git a/mm/filemap.c b/mm/filemap.c index 21e5abfbcdf..51070f1f1b5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2463,7 +2463,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a4eb3113222..213ca1f5340 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -426,7 +426,9 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (ret) goto out_backing; - file_update_time(filp); + ret = file_update_time(filp); + if (ret) + goto out_backing; ret = __xip_file_write (filp, buf, count, pos, ppos); -- cgit v1.2.3-70-g09d2 From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:19 +0200 Subject: vfs: retry last component if opening stale dentry NFS optimizes away d_revalidates for last component of open. This means that open itself can find the dentry stale. This patch allows the filesystem to return EOPENSTALE and the VFS will retry the lookup on just the last component if possible. If the lookup was done using RCU mode, including the last component, then this is not possible since the parent dentry is lost. In this case fall back to non-RCU lookup. Currently this is not used since NFS will always leave RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 37 +++++++++++++++++++++++++++++++++++-- include/linux/errno.h | 1 + 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 998d5316921..7d694194024 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2202,6 +2202,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path, struct file *filp; struct inode *inode; int symlink_ok = 0; + struct path save_parent = { .dentry = NULL, .mnt = NULL }; + bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2267,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) goto exit; +retry_lookup: mutex_lock(&dir->d_inode->i_mutex); dentry = lookup_hash(nd); @@ -2349,12 +2352,21 @@ finish_lookup: return NULL; } - path_to_nameidata(path, nd); + if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { + path_to_nameidata(path, nd); + } else { + save_parent.dentry = nd->path.dentry; + save_parent.mnt = mntget(path->mnt); + nd->path.dentry = path->dentry; + + } nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); - if (error) + if (error) { + path_put(&save_parent); return ERR_PTR(error); + } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto exit; @@ -2377,6 +2389,20 @@ common: if (error) goto exit; filp = nameidata_to_filp(nd); + if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + retried = true; + goto retry_lookup; + } if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { @@ -2396,6 +2422,7 @@ common: out: if (want_write) mnt_drop_write(nd->path.mnt); + path_put(&save_parent); terminate_walk(nd); return filp; @@ -2459,6 +2486,12 @@ out: if (base) fput(base); release_open_intent(nd); + if (filp == ERR_PTR(-EOPENSTALE)) { + if (flags & LOOKUP_RCU) + filp = ERR_PTR(-ECHILD); + else + filp = ERR_PTR(-ESTALE); + } return filp; out_filp: diff --git a/include/linux/errno.h b/include/linux/errno.h index 2d09bfa5c26..e0de516374d 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -17,6 +17,7 @@ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ -- cgit v1.2.3-70-g09d2